1 //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Break False Dependency pass. 10 /// 11 /// Some instructions have false dependencies which cause unnecessary stalls. 12 /// For example, instructions may write part of a register and implicitly 13 /// need to read the other parts of the register. This may cause unwanted 14 /// stalls preventing otherwise unrelated instructions from executing in 15 /// parallel in an out-of-order CPU. 16 /// This pass is aimed at identifying and avoiding these dependencies. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "llvm/CodeGen/LivePhysRegs.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/ReachingDefAnalysis.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/TargetInstrInfo.h" 25 #include "llvm/InitializePasses.h" 26 #include "llvm/MC/MCInstrDesc.h" 27 #include "llvm/MC/MCRegister.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/Support/Debug.h" 30 31 using namespace llvm; 32 33 namespace llvm { 34 35 class BreakFalseDeps : public MachineFunctionPass { 36 private: 37 MachineFunction *MF; 38 const TargetInstrInfo *TII; 39 const TargetRegisterInfo *TRI; 40 RegisterClassInfo RegClassInfo; 41 42 /// List of undefined register reads in this block in forward order. 43 std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; 44 45 /// Storage for register unit liveness. 46 LivePhysRegs LiveRegSet; 47 48 ReachingDefAnalysis *RDA; 49 50 public: 51 static char ID; // Pass identification, replacement for typeid 52 53 BreakFalseDeps() : MachineFunctionPass(ID) { 54 initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); 55 } 56 57 void getAnalysisUsage(AnalysisUsage &AU) const override { 58 AU.setPreservesAll(); 59 AU.addRequired<ReachingDefAnalysis>(); 60 MachineFunctionPass::getAnalysisUsage(AU); 61 } 62 63 bool runOnMachineFunction(MachineFunction &MF) override; 64 65 MachineFunctionProperties getRequiredProperties() const override { 66 return MachineFunctionProperties().set( 67 MachineFunctionProperties::Property::NoVRegs); 68 } 69 70 private: 71 /// Process he given basic block. 72 void processBasicBlock(MachineBasicBlock *MBB); 73 74 /// Update def-ages for registers defined by MI. 75 /// Also break dependencies on partial defs and undef uses. 76 void processDefs(MachineInstr *MI); 77 78 /// Helps avoid false dependencies on undef registers by updating the 79 /// machine instructions' undef operand to use a register that the instruction 80 /// is truly dependent on, or use a register with clearance higher than Pref. 81 /// Returns true if it was able to find a true dependency, thus not requiring 82 /// a dependency breaking instruction regardless of clearance. 83 bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, 84 unsigned Pref); 85 86 /// Return true to if it makes sense to break dependence on a partial 87 /// def or undef use. 88 bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); 89 90 /// Break false dependencies on undefined register reads. 91 /// Walk the block backward computing precise liveness. This is expensive, so 92 /// we only do it on demand. Note that the occurrence of undefined register 93 /// reads that should be broken is very rare, but when they occur we may have 94 /// many in a single block. 95 void processUndefReads(MachineBasicBlock *); 96 }; 97 98 } // namespace llvm 99 100 #define DEBUG_TYPE "break-false-deps" 101 102 char BreakFalseDeps::ID = 0; 103 INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) 104 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) 105 INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) 106 107 FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } 108 109 bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, 110 unsigned Pref) { 111 112 // We can't change tied operands. 113 if (MI->isRegTiedToDefOperand(OpIdx)) 114 return false; 115 116 MachineOperand &MO = MI->getOperand(OpIdx); 117 assert(MO.isUndef() && "Expected undef machine operand"); 118 119 // We can't change registers that aren't renamable. 120 if (!MO.isRenamable()) 121 return false; 122 123 MCRegister OriginalReg = MO.getReg().asMCReg(); 124 125 // Update only undef operands that have reg units that are mapped to one root. 126 for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { 127 unsigned NumRoots = 0; 128 for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) { 129 NumRoots++; 130 if (NumRoots > 1) 131 return false; 132 } 133 } 134 135 // Get the undef operand's register class 136 const TargetRegisterClass *OpRC = 137 TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); 138 139 // If the instruction has a true dependency, we can hide the false depdency 140 // behind it. 141 for (MachineOperand &CurrMO : MI->operands()) { 142 if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || 143 !OpRC->contains(CurrMO.getReg())) 144 continue; 145 // We found a true dependency - replace the undef register with the true 146 // dependency. 147 MO.setReg(CurrMO.getReg()); 148 return true; 149 } 150 151 // Go over all registers in the register class and find the register with 152 // max clearance or clearance higher than Pref. 153 unsigned MaxClearance = 0; 154 unsigned MaxClearanceReg = OriginalReg; 155 ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); 156 for (MCPhysReg Reg : Order) { 157 unsigned Clearance = RDA->getClearance(MI, Reg); 158 if (Clearance <= MaxClearance) 159 continue; 160 MaxClearance = Clearance; 161 MaxClearanceReg = Reg; 162 163 if (MaxClearance > Pref) 164 break; 165 } 166 167 // Update the operand if we found a register with better clearance. 168 if (MaxClearanceReg != OriginalReg) 169 MO.setReg(MaxClearanceReg); 170 171 return false; 172 } 173 174 bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, 175 unsigned Pref) { 176 MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg(); 177 unsigned Clearance = RDA->getClearance(MI, Reg); 178 LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); 179 180 if (Pref > Clearance) { 181 LLVM_DEBUG(dbgs() << ": Break dependency.\n"); 182 return true; 183 } 184 LLVM_DEBUG(dbgs() << ": OK .\n"); 185 return false; 186 } 187 188 void BreakFalseDeps::processDefs(MachineInstr *MI) { 189 assert(!MI->isDebugInstr() && "Won't process debug values"); 190 191 const MCInstrDesc &MCID = MI->getDesc(); 192 193 // Break dependence on undef uses. Do this before updating LiveRegs below. 194 // This can remove a false dependence with no additional instructions. 195 for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) { 196 MachineOperand &MO = MI->getOperand(i); 197 if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef()) 198 continue; 199 200 unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI); 201 if (Pref) { 202 bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref); 203 // We don't need to bother trying to break a dependency if this 204 // instruction has a true dependency on that register through another 205 // operand - we'll have to wait for it to be available regardless. 206 if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref)) 207 UndefReads.push_back(std::make_pair(MI, i)); 208 } 209 } 210 211 // The code below allows the target to create a new instruction to break the 212 // dependence. That opposes the goal of minimizing size, so bail out now. 213 if (MF->getFunction().hasMinSize()) 214 return; 215 216 for (unsigned i = 0, 217 e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); 218 i != e; ++i) { 219 MachineOperand &MO = MI->getOperand(i); 220 if (!MO.isReg() || !MO.getReg()) 221 continue; 222 if (MO.isUse()) 223 continue; 224 // Check clearance before partial register updates. 225 unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); 226 if (Pref && shouldBreakDependence(MI, i, Pref)) 227 TII->breakPartialRegDependency(*MI, i, TRI); 228 } 229 } 230 231 void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { 232 if (UndefReads.empty()) 233 return; 234 235 // The code below allows the target to create a new instruction to break the 236 // dependence. That opposes the goal of minimizing size, so bail out now. 237 if (MF->getFunction().hasMinSize()) 238 return; 239 240 // Collect this block's live out register units. 241 LiveRegSet.init(*TRI); 242 // We do not need to care about pristine registers as they are just preserved 243 // but not actually used in the function. 244 LiveRegSet.addLiveOutsNoPristines(*MBB); 245 246 MachineInstr *UndefMI = UndefReads.back().first; 247 unsigned OpIdx = UndefReads.back().second; 248 249 for (MachineInstr &I : llvm::reverse(*MBB)) { 250 // Update liveness, including the current instruction's defs. 251 LiveRegSet.stepBackward(I); 252 253 if (UndefMI == &I) { 254 if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) 255 TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); 256 257 UndefReads.pop_back(); 258 if (UndefReads.empty()) 259 return; 260 261 UndefMI = UndefReads.back().first; 262 OpIdx = UndefReads.back().second; 263 } 264 } 265 } 266 267 void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { 268 UndefReads.clear(); 269 // If this block is not done, it makes little sense to make any decisions 270 // based on clearance information. We need to make a second pass anyway, 271 // and by then we'll have better information, so we can avoid doing the work 272 // to try and break dependencies now. 273 for (MachineInstr &MI : *MBB) { 274 if (!MI.isDebugInstr()) 275 processDefs(&MI); 276 } 277 processUndefReads(MBB); 278 } 279 280 bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { 281 if (skipFunction(mf.getFunction())) 282 return false; 283 MF = &mf; 284 TII = MF->getSubtarget().getInstrInfo(); 285 TRI = MF->getSubtarget().getRegisterInfo(); 286 RDA = &getAnalysis<ReachingDefAnalysis>(); 287 288 RegClassInfo.runOnMachineFunction(mf); 289 290 LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n"); 291 292 // Traverse the basic blocks. 293 for (MachineBasicBlock &MBB : mf) { 294 processBasicBlock(&MBB); 295 } 296 297 return false; 298 } 299