1 //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Break False Dependency pass. 10 /// 11 /// Some instructions have false dependencies which cause unnecessary stalls. 12 /// For example, instructions may write part of a register and implicitly 13 /// need to read the other parts of the register. This may cause unwanted 14 /// stalls preventing otherwise unrelated instructions from executing in 15 /// parallel in an out-of-order CPU. 16 /// This pass is aimed at identifying and avoiding these dependencies. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "llvm/CodeGen/LivePhysRegs.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/ReachingDefAnalysis.h" 24 #include "llvm/CodeGen/RegisterClassInfo.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/InitializePasses.h" 27 #include "llvm/Support/Debug.h" 28 29 using namespace llvm; 30 31 namespace llvm { 32 33 class BreakFalseDeps : public MachineFunctionPass { 34 private: 35 MachineFunction *MF; 36 const TargetInstrInfo *TII; 37 const TargetRegisterInfo *TRI; 38 RegisterClassInfo RegClassInfo; 39 40 /// List of undefined register reads in this block in forward order. 41 std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; 42 43 /// Storage for register unit liveness. 44 LivePhysRegs LiveRegSet; 45 46 ReachingDefAnalysis *RDA; 47 48 public: 49 static char ID; // Pass identification, replacement for typeid 50 51 BreakFalseDeps() : MachineFunctionPass(ID) { 52 initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); 53 } 54 55 void getAnalysisUsage(AnalysisUsage &AU) const override { 56 AU.setPreservesAll(); 57 AU.addRequired<ReachingDefAnalysis>(); 58 MachineFunctionPass::getAnalysisUsage(AU); 59 } 60 61 bool runOnMachineFunction(MachineFunction &MF) override; 62 63 MachineFunctionProperties getRequiredProperties() const override { 64 return MachineFunctionProperties().set( 65 MachineFunctionProperties::Property::NoVRegs); 66 } 67 68 private: 69 /// Process he given basic block. 70 void processBasicBlock(MachineBasicBlock *MBB); 71 72 /// Update def-ages for registers defined by MI. 73 /// Also break dependencies on partial defs and undef uses. 74 void processDefs(MachineInstr *MI); 75 76 /// Helps avoid false dependencies on undef registers by updating the 77 /// machine instructions' undef operand to use a register that the instruction 78 /// is truly dependent on, or use a register with clearance higher than Pref. 79 /// Returns true if it was able to find a true dependency, thus not requiring 80 /// a dependency breaking instruction regardless of clearance. 81 bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, 82 unsigned Pref); 83 84 /// Return true to if it makes sense to break dependence on a partial 85 /// def or undef use. 86 bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); 87 88 /// Break false dependencies on undefined register reads. 89 /// Walk the block backward computing precise liveness. This is expensive, so 90 /// we only do it on demand. Note that the occurrence of undefined register 91 /// reads that should be broken is very rare, but when they occur we may have 92 /// many in a single block. 93 void processUndefReads(MachineBasicBlock *); 94 }; 95 96 } // namespace llvm 97 98 #define DEBUG_TYPE "break-false-deps" 99 100 char BreakFalseDeps::ID = 0; 101 INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) 102 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) 103 INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) 104 105 FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } 106 107 bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, 108 unsigned Pref) { 109 110 // We can't change tied operands. 111 if (MI->isRegTiedToDefOperand(OpIdx)) 112 return false; 113 114 MachineOperand &MO = MI->getOperand(OpIdx); 115 assert(MO.isUndef() && "Expected undef machine operand"); 116 117 // We can't change registers that aren't renamable. 118 if (!MO.isRenamable()) 119 return false; 120 121 MCRegister OriginalReg = MO.getReg().asMCReg(); 122 123 // Update only undef operands that have reg units that are mapped to one root. 124 for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { 125 unsigned NumRoots = 0; 126 for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) { 127 NumRoots++; 128 if (NumRoots > 1) 129 return false; 130 } 131 } 132 133 // Get the undef operand's register class 134 const TargetRegisterClass *OpRC = 135 TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); 136 137 // If the instruction has a true dependency, we can hide the false depdency 138 // behind it. 139 for (MachineOperand &CurrMO : MI->operands()) { 140 if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || 141 !OpRC->contains(CurrMO.getReg())) 142 continue; 143 // We found a true dependency - replace the undef register with the true 144 // dependency. 145 MO.setReg(CurrMO.getReg()); 146 return true; 147 } 148 149 // Go over all registers in the register class and find the register with 150 // max clearance or clearance higher than Pref. 151 unsigned MaxClearance = 0; 152 unsigned MaxClearanceReg = OriginalReg; 153 ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); 154 for (MCPhysReg Reg : Order) { 155 unsigned Clearance = RDA->getClearance(MI, Reg); 156 if (Clearance <= MaxClearance) 157 continue; 158 MaxClearance = Clearance; 159 MaxClearanceReg = Reg; 160 161 if (MaxClearance > Pref) 162 break; 163 } 164 165 // Update the operand if we found a register with better clearance. 166 if (MaxClearanceReg != OriginalReg) 167 MO.setReg(MaxClearanceReg); 168 169 return false; 170 } 171 172 bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, 173 unsigned Pref) { 174 MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg(); 175 unsigned Clearance = RDA->getClearance(MI, Reg); 176 LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); 177 178 if (Pref > Clearance) { 179 LLVM_DEBUG(dbgs() << ": Break dependency.\n"); 180 return true; 181 } 182 LLVM_DEBUG(dbgs() << ": OK .\n"); 183 return false; 184 } 185 186 void BreakFalseDeps::processDefs(MachineInstr *MI) { 187 assert(!MI->isDebugInstr() && "Won't process debug values"); 188 189 const MCInstrDesc &MCID = MI->getDesc(); 190 191 // Break dependence on undef uses. Do this before updating LiveRegs below. 192 // This can remove a false dependence with no additional instructions. 193 for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) { 194 MachineOperand &MO = MI->getOperand(i); 195 if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef()) 196 continue; 197 198 unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI); 199 if (Pref) { 200 bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref); 201 // We don't need to bother trying to break a dependency if this 202 // instruction has a true dependency on that register through another 203 // operand - we'll have to wait for it to be available regardless. 204 if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref)) 205 UndefReads.push_back(std::make_pair(MI, i)); 206 } 207 } 208 209 // The code below allows the target to create a new instruction to break the 210 // dependence. That opposes the goal of minimizing size, so bail out now. 211 if (MF->getFunction().hasMinSize()) 212 return; 213 214 for (unsigned i = 0, 215 e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); 216 i != e; ++i) { 217 MachineOperand &MO = MI->getOperand(i); 218 if (!MO.isReg() || !MO.getReg()) 219 continue; 220 if (MO.isUse()) 221 continue; 222 // Check clearance before partial register updates. 223 unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); 224 if (Pref && shouldBreakDependence(MI, i, Pref)) 225 TII->breakPartialRegDependency(*MI, i, TRI); 226 } 227 } 228 229 void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { 230 if (UndefReads.empty()) 231 return; 232 233 // The code below allows the target to create a new instruction to break the 234 // dependence. That opposes the goal of minimizing size, so bail out now. 235 if (MF->getFunction().hasMinSize()) 236 return; 237 238 // Collect this block's live out register units. 239 LiveRegSet.init(*TRI); 240 // We do not need to care about pristine registers as they are just preserved 241 // but not actually used in the function. 242 LiveRegSet.addLiveOutsNoPristines(*MBB); 243 244 MachineInstr *UndefMI = UndefReads.back().first; 245 unsigned OpIdx = UndefReads.back().second; 246 247 for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { 248 // Update liveness, including the current instruction's defs. 249 LiveRegSet.stepBackward(I); 250 251 if (UndefMI == &I) { 252 if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) 253 TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); 254 255 UndefReads.pop_back(); 256 if (UndefReads.empty()) 257 return; 258 259 UndefMI = UndefReads.back().first; 260 OpIdx = UndefReads.back().second; 261 } 262 } 263 } 264 265 void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { 266 UndefReads.clear(); 267 // If this block is not done, it makes little sense to make any decisions 268 // based on clearance information. We need to make a second pass anyway, 269 // and by then we'll have better information, so we can avoid doing the work 270 // to try and break dependencies now. 271 for (MachineInstr &MI : *MBB) { 272 if (!MI.isDebugInstr()) 273 processDefs(&MI); 274 } 275 processUndefReads(MBB); 276 } 277 278 bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { 279 if (skipFunction(mf.getFunction())) 280 return false; 281 MF = &mf; 282 TII = MF->getSubtarget().getInstrInfo(); 283 TRI = MF->getSubtarget().getRegisterInfo(); 284 RDA = &getAnalysis<ReachingDefAnalysis>(); 285 286 RegClassInfo.runOnMachineFunction(mf); 287 288 LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n"); 289 290 // Traverse the basic blocks. 291 for (MachineBasicBlock &MBB : mf) { 292 processBasicBlock(&MBB); 293 } 294 295 return false; 296 } 297