1 //===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "PPCMachineScheduler.h" 10 #include "MCTargetDesc/PPCMCTargetDesc.h" 11 12 using namespace llvm; 13 14 static cl::opt<bool> 15 DisableAddiLoadHeuristic("disable-ppc-sched-addi-load", 16 cl::desc("Disable scheduling addi instruction before" 17 "load for ppc"), cl::Hidden); 18 static cl::opt<bool> 19 EnableAddiHeuristic("ppc-postra-bias-addi", 20 cl::desc("Enable scheduling addi instruction as early" 21 "as possible post ra"), 22 cl::Hidden, cl::init(true)); 23 24 static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) { 25 return Cand.SU->getInstr()->getOpcode() == PPC::ADDI || 26 Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; 27 } 28 29 bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, 30 SchedCandidate &TryCand, 31 SchedBoundary &Zone) const { 32 if (DisableAddiLoadHeuristic) 33 return false; 34 35 SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; 36 SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; 37 if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) { 38 TryCand.Reason = Stall; 39 return true; 40 } 41 if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) { 42 TryCand.Reason = NoCand; 43 return true; 44 } 45 46 return false; 47 } 48 49 bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, 50 SchedCandidate &TryCand, 51 SchedBoundary *Zone) const { 52 // From GenericScheduler::tryCandidate 53 54 // Initialize the candidate if needed. 55 if (!Cand.isValid()) { 56 TryCand.Reason = NodeOrder; 57 return true; 58 } 59 60 // Bias PhysReg Defs and copies to their uses and defined respectively. 61 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), 62 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) 63 return TryCand.Reason != NoCand; 64 65 // Avoid exceeding the target's limit. 66 if (DAG->isTrackingPressure() && 67 tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, 68 RegExcess, TRI, DAG->MF)) 69 return TryCand.Reason != NoCand; 70 71 // Avoid increasing the max critical pressure in the scheduled region. 72 if (DAG->isTrackingPressure() && 73 tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, 74 TryCand, Cand, RegCritical, TRI, DAG->MF)) 75 return TryCand.Reason != NoCand; 76 77 // We only compare a subset of features when comparing nodes between 78 // Top and Bottom boundary. Some properties are simply incomparable, in many 79 // other instances we should only override the other boundary if something 80 // is a clear good pick on one boundary. Skip heuristics that are more 81 // "tie-breaking" in nature. 82 bool SameBoundary = Zone != nullptr; 83 if (SameBoundary) { 84 // For loops that are acyclic path limited, aggressively schedule for 85 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal 86 // heuristics to take precedence. 87 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && 88 tryLatency(TryCand, Cand, *Zone)) 89 return TryCand.Reason != NoCand; 90 91 // Prioritize instructions that read unbuffered resources by stall cycles. 92 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), 93 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 94 return TryCand.Reason != NoCand; 95 } 96 97 // Keep clustered nodes together to encourage downstream peephole 98 // optimizations which may reduce resource requirements. 99 // 100 // This is a best effort to set things up for a post-RA pass. Optimizations 101 // like generating loads of multiple registers should ideally be done within 102 // the scheduler pass by combining the loads during DAG postprocessing. 103 const SUnit *CandNextClusterSU = 104 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 105 const SUnit *TryCandNextClusterSU = 106 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 107 if (tryGreater(TryCand.SU == TryCandNextClusterSU, 108 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) 109 return TryCand.Reason != NoCand; 110 111 if (SameBoundary) { 112 // Weak edges are for clustering and other constraints. 113 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), 114 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) 115 return TryCand.Reason != NoCand; 116 } 117 118 // Avoid increasing the max pressure of the entire region. 119 if (DAG->isTrackingPressure() && 120 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, 121 Cand, RegMax, TRI, DAG->MF)) 122 return TryCand.Reason != NoCand; 123 124 if (SameBoundary) { 125 // Avoid critical resource consumption and balance the schedule. 126 TryCand.initResourceDelta(DAG, SchedModel); 127 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 128 TryCand, Cand, ResourceReduce)) 129 return TryCand.Reason != NoCand; 130 if (tryGreater(TryCand.ResDelta.DemandedResources, 131 Cand.ResDelta.DemandedResources, TryCand, Cand, 132 ResourceDemand)) 133 return TryCand.Reason != NoCand; 134 135 // Avoid serializing long latency dependence chains. 136 // For acyclic path limited loops, latency was already checked above. 137 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && 138 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) 139 return TryCand.Reason != NoCand; 140 141 // Fall through to original instruction order. 142 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || 143 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 144 TryCand.Reason = NodeOrder; 145 } 146 } 147 148 // GenericScheduler::tryCandidate end 149 150 // Add powerpc specific heuristic only when TryCand isn't selected or 151 // selected as node order. 152 if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) 153 return true; 154 155 // There are some benefits to schedule the ADDI before the load to hide the 156 // latency, as RA may create a true dependency between the load and addi. 157 if (SameBoundary) { 158 if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) 159 return TryCand.Reason != NoCand; 160 } 161 162 return TryCand.Reason != NoCand; 163 } 164 165 bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, 166 SchedCandidate &TryCand) const { 167 if (!EnableAddiHeuristic) 168 return false; 169 170 if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) { 171 TryCand.Reason = Stall; 172 return true; 173 } 174 return false; 175 } 176 177 bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, 178 SchedCandidate &TryCand) { 179 // From PostGenericScheduler::tryCandidate 180 181 // Initialize the candidate if needed. 182 if (!Cand.isValid()) { 183 TryCand.Reason = NodeOrder; 184 return true; 185 } 186 187 // Prioritize instructions that read unbuffered resources by stall cycles. 188 if (tryLess(Top.getLatencyStallCycles(TryCand.SU), 189 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 190 return TryCand.Reason != NoCand; 191 192 // Keep clustered nodes together. 193 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), 194 Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) 195 return TryCand.Reason != NoCand; 196 197 // Avoid critical resource consumption and balance the schedule. 198 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 199 TryCand, Cand, ResourceReduce)) 200 return TryCand.Reason != NoCand; 201 if (tryGreater(TryCand.ResDelta.DemandedResources, 202 Cand.ResDelta.DemandedResources, TryCand, Cand, 203 ResourceDemand)) 204 return TryCand.Reason != NoCand; 205 206 // Avoid serializing long latency dependence chains. 207 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { 208 return TryCand.Reason != NoCand; 209 } 210 211 // Fall through to original instruction order. 212 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) 213 TryCand.Reason = NodeOrder; 214 215 // PostGenericScheduler::tryCandidate end 216 217 // Add powerpc post ra specific heuristic only when TryCand isn't selected or 218 // selected as node order. 219 if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) 220 return true; 221 222 // There are some benefits to schedule the ADDI as early as possible post ra 223 // to avoid stalled by vector instructions which take up all the hw units. 224 // And ADDI is usually used to post inc the loop indvar, which matters the 225 // performance. 226 if (biasAddiCandidate(Cand, TryCand)) 227 return TryCand.Reason != NoCand; 228 229 return TryCand.Reason != NoCand; 230 } 231 232 void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) { 233 // Custom PPC PostRA specific behavior here. 234 PostGenericScheduler::enterMBB(MBB); 235 } 236 237 void PPCPostRASchedStrategy::leaveMBB() { 238 // Custom PPC PostRA specific behavior here. 239 PostGenericScheduler::leaveMBB(); 240 } 241 242 void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) { 243 // Custom PPC PostRA specific initialization here. 244 PostGenericScheduler::initialize(Dag); 245 } 246 247 SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) { 248 // Custom PPC PostRA specific scheduling here. 249 return PostGenericScheduler::pickNode(IsTopNode); 250 } 251 252