1 //===---------------------------- GCNILPSched.cpp - -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/CodeGen/ScheduleDAG.h" 14 #include "llvm/Support/Debug.h" 15 16 using namespace llvm; 17 18 #define DEBUG_TYPE "machine-scheduler" 19 20 namespace { 21 22 class GCNILPScheduler { 23 struct Candidate : ilist_node<Candidate> { 24 SUnit *SU; 25 26 Candidate(SUnit *SU_) 27 : SU(SU_) {} 28 }; 29 30 SpecificBumpPtrAllocator<Candidate> Alloc; 31 typedef simple_ilist<Candidate> Queue; 32 Queue PendingQueue; 33 Queue AvailQueue; 34 unsigned CurQueueId = 0; 35 36 std::vector<unsigned> SUNumbers; 37 38 /// CurCycle - The current scheduler state corresponds to this cycle. 39 unsigned CurCycle = 0; 40 41 unsigned getNodePriority(const SUnit *SU) const; 42 43 const SUnit *pickBest(const SUnit *left, const SUnit *right); 44 Candidate* pickCandidate(); 45 46 void releasePending(); 47 void advanceToCycle(unsigned NextCycle); 48 void releasePredecessors(const SUnit* SU); 49 50 public: 51 std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, 52 const ScheduleDAG &DAG); 53 }; 54 } // namespace 55 56 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 57 /// Smaller number is the higher priority. 58 static unsigned 59 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 60 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 61 if (SethiUllmanNumber != 0) 62 return SethiUllmanNumber; 63 64 unsigned Extra = 0; 65 for (const SDep &Pred : SU->Preds) { 66 if (Pred.isCtrl()) continue; // ignore chain preds 67 SUnit *PredSU = Pred.getSUnit(); 68 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 69 if (PredSethiUllman > SethiUllmanNumber) { 70 SethiUllmanNumber = PredSethiUllman; 71 Extra = 0; 72 } 73 else if (PredSethiUllman == SethiUllmanNumber) 74 ++Extra; 75 } 76 77 SethiUllmanNumber += Extra; 78 79 if (SethiUllmanNumber == 0) 80 SethiUllmanNumber = 1; 81 82 return SethiUllmanNumber; 83 } 84 85 // Lower priority means schedule further down. For bottom-up scheduling, lower 86 // priority SUs are scheduled before higher priority SUs. 87 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const { 88 assert(SU->NodeNum < SUNumbers.size()); 89 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 90 // If SU does not have a register use, i.e. it doesn't produce a value 91 // that would be consumed (e.g. store), then it terminates a chain of 92 // computation. Give it a large SethiUllman number so it will be 93 // scheduled right before its predecessors that it doesn't lengthen 94 // their live ranges. 95 return 0xffff; 96 97 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 98 // If SU does not have a register def, schedule it close to its uses 99 // because it does not lengthen any live ranges. 100 return 0; 101 102 return SUNumbers[SU->NodeNum]; 103 } 104 105 /// closestSucc - Returns the scheduled cycle of the successor which is 106 /// closest to the current cycle. 107 static unsigned closestSucc(const SUnit *SU) { 108 unsigned MaxHeight = 0; 109 for (const SDep &Succ : SU->Succs) { 110 if (Succ.isCtrl()) continue; // ignore chain succs 111 unsigned Height = Succ.getSUnit()->getHeight(); 112 // If there are bunch of CopyToRegs stacked up, they should be considered 113 // to be at the same position. 114 if (Height > MaxHeight) 115 MaxHeight = Height; 116 } 117 return MaxHeight; 118 } 119 120 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 121 /// for scratch registers, i.e. number of data dependencies. 122 static unsigned calcMaxScratches(const SUnit *SU) { 123 unsigned Scratches = 0; 124 for (const SDep &Pred : SU->Preds) { 125 if (Pred.isCtrl()) continue; // ignore chain preds 126 Scratches++; 127 } 128 return Scratches; 129 } 130 131 // Return -1 if left has higher priority, 1 if right has higher priority. 132 // Return 0 if latency-based priority is equivalent. 133 static int BUCompareLatency(const SUnit *left, const SUnit *right) { 134 // Scheduling an instruction that uses a VReg whose postincrement has not yet 135 // been scheduled will induce a copy. Model this as an extra cycle of latency. 136 int LHeight = (int)left->getHeight(); 137 int RHeight = (int)right->getHeight(); 138 139 // If either node is scheduling for latency, sort them by height/depth 140 // and latency. 141 142 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 143 // is enabled, grouping instructions by cycle, then its height is already 144 // covered so only its depth matters. We also reach this point if both stall 145 // but have the same height. 146 if (LHeight != RHeight) 147 return LHeight > RHeight ? 1 : -1; 148 149 int LDepth = left->getDepth(); 150 int RDepth = right->getDepth(); 151 if (LDepth != RDepth) { 152 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 153 << ") depth " << LDepth << " vs SU (" << right->NodeNum 154 << ") depth " << RDepth << "\n"); 155 return LDepth < RDepth ? 1 : -1; 156 } 157 if (left->Latency != right->Latency) 158 return left->Latency > right->Latency ? 1 : -1; 159 160 return 0; 161 } 162 163 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right) 164 { 165 // TODO: add register pressure lowering checks 166 167 bool const DisableSchedCriticalPath = false; 168 int MaxReorderWindow = 6; 169 if (!DisableSchedCriticalPath) { 170 int spread = (int)left->getDepth() - (int)right->getDepth(); 171 if (std::abs(spread) > MaxReorderWindow) { 172 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 173 << left->getDepth() << " != SU(" << right->NodeNum 174 << "): " << right->getDepth() << "\n"); 175 return left->getDepth() < right->getDepth() ? right : left; 176 } 177 } 178 179 bool const DisableSchedHeight = false; 180 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 181 int spread = (int)left->getHeight() - (int)right->getHeight(); 182 if (std::abs(spread) > MaxReorderWindow) 183 return left->getHeight() > right->getHeight() ? right : left; 184 } 185 186 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 187 unsigned LPriority = getNodePriority(left); 188 unsigned RPriority = getNodePriority(right); 189 190 if (LPriority != RPriority) 191 return LPriority > RPriority ? right : left; 192 193 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 194 // e.g. 195 // t1 = op t2, c1 196 // t3 = op t4, c2 197 // 198 // and the following instructions are both ready. 199 // t2 = op c3 200 // t4 = op c4 201 // 202 // Then schedule t2 = op first. 203 // i.e. 204 // t4 = op c4 205 // t2 = op c3 206 // t1 = op t2, c1 207 // t3 = op t4, c2 208 // 209 // This creates more short live intervals. 210 unsigned LDist = closestSucc(left); 211 unsigned RDist = closestSucc(right); 212 if (LDist != RDist) 213 return LDist < RDist ? right : left; 214 215 // How many registers becomes live when the node is scheduled. 216 unsigned LScratch = calcMaxScratches(left); 217 unsigned RScratch = calcMaxScratches(right); 218 if (LScratch != RScratch) 219 return LScratch > RScratch ? right : left; 220 221 bool const DisableSchedCycles = false; 222 if (!DisableSchedCycles) { 223 int result = BUCompareLatency(left, right); 224 if (result != 0) 225 return result > 0 ? right : left; 226 return left; 227 } 228 else { 229 if (left->getHeight() != right->getHeight()) 230 return (left->getHeight() > right->getHeight()) ? right : left; 231 232 if (left->getDepth() != right->getDepth()) 233 return (left->getDepth() < right->getDepth()) ? right : left; 234 } 235 236 assert(left->NodeQueueId && right->NodeQueueId && 237 "NodeQueueId cannot be zero"); 238 return (left->NodeQueueId > right->NodeQueueId) ? right : left; 239 } 240 241 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() { 242 if (AvailQueue.empty()) 243 return nullptr; 244 auto Best = AvailQueue.begin(); 245 for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) { 246 auto NewBestSU = pickBest(Best->SU, I->SU); 247 if (NewBestSU != Best->SU) { 248 assert(NewBestSU == I->SU); 249 Best = I; 250 } 251 } 252 return &*Best; 253 } 254 255 void GCNILPScheduler::releasePending() { 256 // Check to see if any of the pending instructions are ready to issue. If 257 // so, add them to the available queue. 258 for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) { 259 auto &C = *I++; 260 if (C.SU->getHeight() <= CurCycle) { 261 PendingQueue.remove(C); 262 AvailQueue.push_back(C); 263 C.SU->NodeQueueId = CurQueueId++; 264 } 265 } 266 } 267 268 /// Move the scheduler state forward by the specified number of Cycles. 269 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) { 270 if (NextCycle <= CurCycle) 271 return; 272 CurCycle = NextCycle; 273 releasePending(); 274 } 275 276 void GCNILPScheduler::releasePredecessors(const SUnit* SU) { 277 for (const auto &PredEdge : SU->Preds) { 278 auto PredSU = PredEdge.getSUnit(); 279 if (PredEdge.isWeak()) 280 continue; 281 assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0); 282 283 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency()); 284 285 if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0) 286 PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU)); 287 } 288 } 289 290 std::vector<const SUnit*> 291 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots, 292 const ScheduleDAG &DAG) { 293 auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits; 294 295 std::vector<SUnit> SUSavedCopy; 296 SUSavedCopy.resize(SUnits.size()); 297 298 // we cannot save only those fields we touch: some of them are private 299 // so save units verbatim: this assumes SUnit should have value semantics 300 for (const SUnit &SU : SUnits) 301 SUSavedCopy[SU.NodeNum] = SU; 302 303 SUNumbers.assign(SUnits.size(), 0); 304 for (const SUnit &SU : SUnits) 305 CalcNodeSethiUllmanNumber(&SU, SUNumbers); 306 307 for (auto SU : BotRoots) { 308 AvailQueue.push_back( 309 *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU))); 310 } 311 releasePredecessors(&DAG.ExitSU); 312 313 std::vector<const SUnit*> Schedule; 314 Schedule.reserve(SUnits.size()); 315 while (true) { 316 if (AvailQueue.empty() && !PendingQueue.empty()) { 317 auto EarliestSU = std::min_element( 318 PendingQueue.begin(), PendingQueue.end(), 319 [=](const Candidate& C1, const Candidate& C2) { 320 return C1.SU->getHeight() < C2.SU->getHeight(); 321 })->SU; 322 advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight())); 323 } 324 if (AvailQueue.empty()) 325 break; 326 327 LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n" 328 "Ready queue:"; 329 for (auto &C 330 : AvailQueue) dbgs() 331 << ' ' << C.SU->NodeNum; 332 dbgs() << '\n';); 333 334 auto C = pickCandidate(); 335 assert(C); 336 AvailQueue.remove(*C); 337 auto SU = C->SU; 338 LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU)); 339 340 advanceToCycle(SU->getHeight()); 341 342 releasePredecessors(SU); 343 Schedule.push_back(SU); 344 SU->isScheduled = true; 345 } 346 assert(SUnits.size() == Schedule.size()); 347 348 std::reverse(Schedule.begin(), Schedule.end()); 349 350 // restore units 351 for (auto &SU : SUnits) 352 SU = SUSavedCopy[SU.NodeNum]; 353 354 return Schedule; 355 } 356 357 namespace llvm { 358 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots, 359 const ScheduleDAG &DAG) { 360 GCNILPScheduler S; 361 return S.schedule(BotRoots, DAG); 362 } 363 } 364