1 //===---------------------------- GCNILPSched.cpp - -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/CodeGen/ScheduleDAG.h" 14 15 using namespace llvm; 16 17 #define DEBUG_TYPE "machine-scheduler" 18 19 namespace { 20 21 class GCNILPScheduler { 22 struct Candidate : ilist_node<Candidate> { 23 SUnit *SU; 24 25 Candidate(SUnit *SU_) 26 : SU(SU_) {} 27 }; 28 29 SpecificBumpPtrAllocator<Candidate> Alloc; 30 typedef simple_ilist<Candidate> Queue; 31 Queue PendingQueue; 32 Queue AvailQueue; 33 unsigned CurQueueId = 0; 34 35 std::vector<unsigned> SUNumbers; 36 37 /// CurCycle - The current scheduler state corresponds to this cycle. 38 unsigned CurCycle = 0; 39 40 unsigned getNodePriority(const SUnit *SU) const; 41 42 const SUnit *pickBest(const SUnit *left, const SUnit *right); 43 Candidate* pickCandidate(); 44 45 void releasePending(); 46 void advanceToCycle(unsigned NextCycle); 47 void releasePredecessors(const SUnit* SU); 48 49 public: 50 std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, 51 const ScheduleDAG &DAG); 52 }; 53 } // namespace 54 55 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 56 /// Smaller number is the higher priority. 57 static unsigned 58 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 59 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 60 if (SethiUllmanNumber != 0) 61 return SethiUllmanNumber; 62 63 unsigned Extra = 0; 64 for (const SDep &Pred : SU->Preds) { 65 if (Pred.isCtrl()) continue; // ignore chain preds 66 SUnit *PredSU = Pred.getSUnit(); 67 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 68 if (PredSethiUllman > SethiUllmanNumber) { 69 SethiUllmanNumber = PredSethiUllman; 70 Extra = 0; 71 } 72 else if (PredSethiUllman == SethiUllmanNumber) 73 ++Extra; 74 } 75 76 SethiUllmanNumber += Extra; 77 78 if (SethiUllmanNumber == 0) 79 SethiUllmanNumber = 1; 80 81 return SethiUllmanNumber; 82 } 83 84 // Lower priority means schedule further down. For bottom-up scheduling, lower 85 // priority SUs are scheduled before higher priority SUs. 86 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const { 87 assert(SU->NodeNum < SUNumbers.size()); 88 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 89 // If SU does not have a register use, i.e. it doesn't produce a value 90 // that would be consumed (e.g. store), then it terminates a chain of 91 // computation. Give it a large SethiUllman number so it will be 92 // scheduled right before its predecessors that it doesn't lengthen 93 // their live ranges. 94 return 0xffff; 95 96 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 97 // If SU does not have a register def, schedule it close to its uses 98 // because it does not lengthen any live ranges. 99 return 0; 100 101 return SUNumbers[SU->NodeNum]; 102 } 103 104 /// closestSucc - Returns the scheduled cycle of the successor which is 105 /// closest to the current cycle. 106 static unsigned closestSucc(const SUnit *SU) { 107 unsigned MaxHeight = 0; 108 for (const SDep &Succ : SU->Succs) { 109 if (Succ.isCtrl()) continue; // ignore chain succs 110 unsigned Height = Succ.getSUnit()->getHeight(); 111 // If there are bunch of CopyToRegs stacked up, they should be considered 112 // to be at the same position. 113 if (Height > MaxHeight) 114 MaxHeight = Height; 115 } 116 return MaxHeight; 117 } 118 119 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 120 /// for scratch registers, i.e. number of data dependencies. 121 static unsigned calcMaxScratches(const SUnit *SU) { 122 unsigned Scratches = 0; 123 for (const SDep &Pred : SU->Preds) { 124 if (Pred.isCtrl()) continue; // ignore chain preds 125 Scratches++; 126 } 127 return Scratches; 128 } 129 130 // Return -1 if left has higher priority, 1 if right has higher priority. 131 // Return 0 if latency-based priority is equivalent. 132 static int BUCompareLatency(const SUnit *left, const SUnit *right) { 133 // Scheduling an instruction that uses a VReg whose postincrement has not yet 134 // been scheduled will induce a copy. Model this as an extra cycle of latency. 135 int LHeight = (int)left->getHeight(); 136 int RHeight = (int)right->getHeight(); 137 138 // If either node is scheduling for latency, sort them by height/depth 139 // and latency. 140 141 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 142 // is enabled, grouping instructions by cycle, then its height is already 143 // covered so only its depth matters. We also reach this point if both stall 144 // but have the same height. 145 if (LHeight != RHeight) 146 return LHeight > RHeight ? 1 : -1; 147 148 int LDepth = left->getDepth(); 149 int RDepth = right->getDepth(); 150 if (LDepth != RDepth) { 151 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 152 << ") depth " << LDepth << " vs SU (" << right->NodeNum 153 << ") depth " << RDepth << "\n"); 154 return LDepth < RDepth ? 1 : -1; 155 } 156 if (left->Latency != right->Latency) 157 return left->Latency > right->Latency ? 1 : -1; 158 159 return 0; 160 } 161 162 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right) 163 { 164 // TODO: add register pressure lowering checks 165 166 bool const DisableSchedCriticalPath = false; 167 int MaxReorderWindow = 6; 168 if (!DisableSchedCriticalPath) { 169 int spread = (int)left->getDepth() - (int)right->getDepth(); 170 if (std::abs(spread) > MaxReorderWindow) { 171 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 172 << left->getDepth() << " != SU(" << right->NodeNum 173 << "): " << right->getDepth() << "\n"); 174 return left->getDepth() < right->getDepth() ? right : left; 175 } 176 } 177 178 bool const DisableSchedHeight = false; 179 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 180 int spread = (int)left->getHeight() - (int)right->getHeight(); 181 if (std::abs(spread) > MaxReorderWindow) 182 return left->getHeight() > right->getHeight() ? right : left; 183 } 184 185 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 186 unsigned LPriority = getNodePriority(left); 187 unsigned RPriority = getNodePriority(right); 188 189 if (LPriority != RPriority) 190 return LPriority > RPriority ? right : left; 191 192 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 193 // e.g. 194 // t1 = op t2, c1 195 // t3 = op t4, c2 196 // 197 // and the following instructions are both ready. 198 // t2 = op c3 199 // t4 = op c4 200 // 201 // Then schedule t2 = op first. 202 // i.e. 203 // t4 = op c4 204 // t2 = op c3 205 // t1 = op t2, c1 206 // t3 = op t4, c2 207 // 208 // This creates more short live intervals. 209 unsigned LDist = closestSucc(left); 210 unsigned RDist = closestSucc(right); 211 if (LDist != RDist) 212 return LDist < RDist ? right : left; 213 214 // How many registers becomes live when the node is scheduled. 215 unsigned LScratch = calcMaxScratches(left); 216 unsigned RScratch = calcMaxScratches(right); 217 if (LScratch != RScratch) 218 return LScratch > RScratch ? right : left; 219 220 bool const DisableSchedCycles = false; 221 if (!DisableSchedCycles) { 222 int result = BUCompareLatency(left, right); 223 if (result != 0) 224 return result > 0 ? right : left; 225 return left; 226 } 227 else { 228 if (left->getHeight() != right->getHeight()) 229 return (left->getHeight() > right->getHeight()) ? right : left; 230 231 if (left->getDepth() != right->getDepth()) 232 return (left->getDepth() < right->getDepth()) ? right : left; 233 } 234 235 assert(left->NodeQueueId && right->NodeQueueId && 236 "NodeQueueId cannot be zero"); 237 return (left->NodeQueueId > right->NodeQueueId) ? right : left; 238 } 239 240 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() { 241 if (AvailQueue.empty()) 242 return nullptr; 243 auto Best = AvailQueue.begin(); 244 for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) { 245 auto NewBestSU = pickBest(Best->SU, I->SU); 246 if (NewBestSU != Best->SU) { 247 assert(NewBestSU == I->SU); 248 Best = I; 249 } 250 } 251 return &*Best; 252 } 253 254 void GCNILPScheduler::releasePending() { 255 // Check to see if any of the pending instructions are ready to issue. If 256 // so, add them to the available queue. 257 for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) { 258 auto &C = *I++; 259 if (C.SU->getHeight() <= CurCycle) { 260 PendingQueue.remove(C); 261 AvailQueue.push_back(C); 262 C.SU->NodeQueueId = CurQueueId++; 263 } 264 } 265 } 266 267 /// Move the scheduler state forward by the specified number of Cycles. 268 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) { 269 if (NextCycle <= CurCycle) 270 return; 271 CurCycle = NextCycle; 272 releasePending(); 273 } 274 275 void GCNILPScheduler::releasePredecessors(const SUnit* SU) { 276 for (const auto &PredEdge : SU->Preds) { 277 auto PredSU = PredEdge.getSUnit(); 278 if (PredEdge.isWeak()) 279 continue; 280 assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0); 281 282 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency()); 283 284 if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0) 285 PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU)); 286 } 287 } 288 289 std::vector<const SUnit*> 290 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots, 291 const ScheduleDAG &DAG) { 292 auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits; 293 294 std::vector<SUnit> SUSavedCopy; 295 SUSavedCopy.resize(SUnits.size()); 296 297 // we cannot save only those fields we touch: some of them are private 298 // so save units verbatim: this assumes SUnit should have value semantics 299 for (const SUnit &SU : SUnits) 300 SUSavedCopy[SU.NodeNum] = SU; 301 302 SUNumbers.assign(SUnits.size(), 0); 303 for (const SUnit &SU : SUnits) 304 CalcNodeSethiUllmanNumber(&SU, SUNumbers); 305 306 for (auto SU : BotRoots) { 307 AvailQueue.push_back( 308 *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU))); 309 } 310 releasePredecessors(&DAG.ExitSU); 311 312 std::vector<const SUnit*> Schedule; 313 Schedule.reserve(SUnits.size()); 314 while (true) { 315 if (AvailQueue.empty() && !PendingQueue.empty()) { 316 auto EarliestSU = std::min_element( 317 PendingQueue.begin(), PendingQueue.end(), 318 [=](const Candidate& C1, const Candidate& C2) { 319 return C1.SU->getHeight() < C2.SU->getHeight(); 320 })->SU; 321 advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight())); 322 } 323 if (AvailQueue.empty()) 324 break; 325 326 LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n" 327 "Ready queue:"; 328 for (auto &C 329 : AvailQueue) dbgs() 330 << ' ' << C.SU->NodeNum; 331 dbgs() << '\n';); 332 333 auto C = pickCandidate(); 334 assert(C); 335 AvailQueue.remove(*C); 336 auto SU = C->SU; 337 LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU)); 338 339 advanceToCycle(SU->getHeight()); 340 341 releasePredecessors(SU); 342 Schedule.push_back(SU); 343 SU->isScheduled = true; 344 } 345 assert(SUnits.size() == Schedule.size()); 346 347 std::reverse(Schedule.begin(), Schedule.end()); 348 349 // restore units 350 for (auto &SU : SUnits) 351 SU = SUSavedCopy[SU.NodeNum]; 352 353 return Schedule; 354 } 355 356 namespace llvm { 357 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots, 358 const ScheduleDAG &DAG) { 359 GCNILPScheduler S; 360 return S.schedule(BotRoots, DAG); 361 } 362 } 363