xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNILPSched.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===---------------------------- GCNILPSched.cpp - -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/ScheduleDAG.h"
14 
15 using namespace llvm;
16 
17 #define DEBUG_TYPE "machine-scheduler"
18 
19 namespace {
20 
21 class GCNILPScheduler {
22   struct Candidate : ilist_node<Candidate> {
23     SUnit *SU;
24 
25     Candidate(SUnit *SU_)
26       : SU(SU_) {}
27   };
28 
29   SpecificBumpPtrAllocator<Candidate> Alloc;
30   typedef simple_ilist<Candidate> Queue;
31   Queue PendingQueue;
32   Queue AvailQueue;
33   unsigned CurQueueId = 0;
34 
35   std::vector<unsigned> SUNumbers;
36 
37   /// CurCycle - The current scheduler state corresponds to this cycle.
38   unsigned CurCycle = 0;
39 
40   unsigned getNodePriority(const SUnit *SU) const;
41 
42   const SUnit *pickBest(const SUnit *left, const SUnit *right);
43   Candidate* pickCandidate();
44 
45   void releasePending();
46   void advanceToCycle(unsigned NextCycle);
47   void releasePredecessors(const SUnit* SU);
48 
49 public:
50   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
51                                      const ScheduleDAG &DAG);
52 };
53 } // namespace
54 
55 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
56 /// Smaller number is the higher priority.
57 static unsigned
58 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
59   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
60   if (SethiUllmanNumber != 0)
61     return SethiUllmanNumber;
62 
63   unsigned Extra = 0;
64   for (const SDep &Pred : SU->Preds) {
65     if (Pred.isCtrl()) continue;  // ignore chain preds
66     SUnit *PredSU = Pred.getSUnit();
67     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
68     if (PredSethiUllman > SethiUllmanNumber) {
69       SethiUllmanNumber = PredSethiUllman;
70       Extra = 0;
71     }
72     else if (PredSethiUllman == SethiUllmanNumber)
73       ++Extra;
74   }
75 
76   SethiUllmanNumber += Extra;
77 
78   if (SethiUllmanNumber == 0)
79     SethiUllmanNumber = 1;
80 
81   return SethiUllmanNumber;
82 }
83 
84 // Lower priority means schedule further down. For bottom-up scheduling, lower
85 // priority SUs are scheduled before higher priority SUs.
86 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
87   assert(SU->NodeNum < SUNumbers.size());
88   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
89     // If SU does not have a register use, i.e. it doesn't produce a value
90     // that would be consumed (e.g. store), then it terminates a chain of
91     // computation.  Give it a large SethiUllman number so it will be
92     // scheduled right before its predecessors that it doesn't lengthen
93     // their live ranges.
94     return 0xffff;
95 
96   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
97     // If SU does not have a register def, schedule it close to its uses
98     // because it does not lengthen any live ranges.
99     return 0;
100 
101   return SUNumbers[SU->NodeNum];
102 }
103 
104 /// closestSucc - Returns the scheduled cycle of the successor which is
105 /// closest to the current cycle.
106 static unsigned closestSucc(const SUnit *SU) {
107   unsigned MaxHeight = 0;
108   for (const SDep &Succ : SU->Succs) {
109     if (Succ.isCtrl()) continue;  // ignore chain succs
110     unsigned Height = Succ.getSUnit()->getHeight();
111     // If there are bunch of CopyToRegs stacked up, they should be considered
112     // to be at the same position.
113     if (Height > MaxHeight)
114       MaxHeight = Height;
115   }
116   return MaxHeight;
117 }
118 
119 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
120 /// for scratch registers, i.e. number of data dependencies.
121 static unsigned calcMaxScratches(const SUnit *SU) {
122   unsigned Scratches = 0;
123   for (const SDep &Pred : SU->Preds) {
124     if (Pred.isCtrl()) continue;  // ignore chain preds
125     Scratches++;
126   }
127   return Scratches;
128 }
129 
130 // Return -1 if left has higher priority, 1 if right has higher priority.
131 // Return 0 if latency-based priority is equivalent.
132 static int BUCompareLatency(const SUnit *left, const SUnit *right) {
133   // Scheduling an instruction that uses a VReg whose postincrement has not yet
134   // been scheduled will induce a copy. Model this as an extra cycle of latency.
135   int LHeight = (int)left->getHeight();
136   int RHeight = (int)right->getHeight();
137 
138   // If either node is scheduling for latency, sort them by height/depth
139   // and latency.
140 
141   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
142   // is enabled, grouping instructions by cycle, then its height is already
143   // covered so only its depth matters. We also reach this point if both stall
144   // but have the same height.
145   if (LHeight != RHeight)
146     return LHeight > RHeight ? 1 : -1;
147 
148   int LDepth = left->getDepth();
149   int RDepth = right->getDepth();
150   if (LDepth != RDepth) {
151     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
152                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
153                       << ") depth " << RDepth << "\n");
154     return LDepth < RDepth ? 1 : -1;
155   }
156   if (left->Latency != right->Latency)
157     return left->Latency > right->Latency ? 1 : -1;
158 
159   return 0;
160 }
161 
162 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
163 {
164   // TODO: add register pressure lowering checks
165 
166   bool const DisableSchedCriticalPath = false;
167   int MaxReorderWindow = 6;
168   if (!DisableSchedCriticalPath) {
169     int spread = (int)left->getDepth() - (int)right->getDepth();
170     if (std::abs(spread) > MaxReorderWindow) {
171       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
172                         << left->getDepth() << " != SU(" << right->NodeNum
173                         << "): " << right->getDepth() << "\n");
174       return left->getDepth() < right->getDepth() ? right : left;
175     }
176   }
177 
178   bool const DisableSchedHeight = false;
179   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
180     int spread = (int)left->getHeight() - (int)right->getHeight();
181     if (std::abs(spread) > MaxReorderWindow)
182       return left->getHeight() > right->getHeight() ? right : left;
183   }
184 
185   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
186   unsigned LPriority = getNodePriority(left);
187   unsigned RPriority = getNodePriority(right);
188 
189   if (LPriority != RPriority)
190     return LPriority > RPriority ? right : left;
191 
192   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
193   // e.g.
194   // t1 = op t2, c1
195   // t3 = op t4, c2
196   //
197   // and the following instructions are both ready.
198   // t2 = op c3
199   // t4 = op c4
200   //
201   // Then schedule t2 = op first.
202   // i.e.
203   // t4 = op c4
204   // t2 = op c3
205   // t1 = op t2, c1
206   // t3 = op t4, c2
207   //
208   // This creates more short live intervals.
209   unsigned LDist = closestSucc(left);
210   unsigned RDist = closestSucc(right);
211   if (LDist != RDist)
212     return LDist < RDist ? right : left;
213 
214   // How many registers becomes live when the node is scheduled.
215   unsigned LScratch = calcMaxScratches(left);
216   unsigned RScratch = calcMaxScratches(right);
217   if (LScratch != RScratch)
218     return LScratch > RScratch ? right : left;
219 
220   bool const DisableSchedCycles = false;
221   if (!DisableSchedCycles) {
222     int result = BUCompareLatency(left, right);
223     if (result != 0)
224       return result > 0 ? right : left;
225     return left;
226   }
227   else {
228     if (left->getHeight() != right->getHeight())
229       return (left->getHeight() > right->getHeight()) ? right : left;
230 
231     if (left->getDepth() != right->getDepth())
232       return (left->getDepth() < right->getDepth()) ? right : left;
233   }
234 
235   assert(left->NodeQueueId && right->NodeQueueId &&
236         "NodeQueueId cannot be zero");
237   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
238 }
239 
240 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
241   if (AvailQueue.empty())
242     return nullptr;
243   auto Best = AvailQueue.begin();
244   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
245     auto NewBestSU = pickBest(Best->SU, I->SU);
246     if (NewBestSU != Best->SU) {
247       assert(NewBestSU == I->SU);
248       Best = I;
249     }
250   }
251   return &*Best;
252 }
253 
254 void GCNILPScheduler::releasePending() {
255   // Check to see if any of the pending instructions are ready to issue.  If
256   // so, add them to the available queue.
257   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
258     auto &C = *I++;
259     if (C.SU->getHeight() <= CurCycle) {
260       PendingQueue.remove(C);
261       AvailQueue.push_back(C);
262       C.SU->NodeQueueId = CurQueueId++;
263     }
264   }
265 }
266 
267 /// Move the scheduler state forward by the specified number of Cycles.
268 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
269   if (NextCycle <= CurCycle)
270     return;
271   CurCycle = NextCycle;
272   releasePending();
273 }
274 
275 void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
276   for (const auto &PredEdge : SU->Preds) {
277     auto PredSU = PredEdge.getSUnit();
278     if (PredEdge.isWeak())
279       continue;
280     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
281 
282     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
283 
284     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
285       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
286   }
287 }
288 
289 std::vector<const SUnit*>
290 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
291                           const ScheduleDAG &DAG) {
292   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
293 
294   std::vector<SUnit> SUSavedCopy;
295   SUSavedCopy.resize(SUnits.size());
296 
297   // we cannot save only those fields we touch: some of them are private
298   // so save units verbatim: this assumes SUnit should have value semantics
299   for (const SUnit &SU : SUnits)
300     SUSavedCopy[SU.NodeNum] = SU;
301 
302   SUNumbers.assign(SUnits.size(), 0);
303   for (const SUnit &SU : SUnits)
304     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
305 
306   for (auto SU : BotRoots) {
307     AvailQueue.push_back(
308       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
309   }
310   releasePredecessors(&DAG.ExitSU);
311 
312   std::vector<const SUnit*> Schedule;
313   Schedule.reserve(SUnits.size());
314   while (true) {
315     if (AvailQueue.empty() && !PendingQueue.empty()) {
316       auto EarliestSU = std::min_element(
317         PendingQueue.begin(), PendingQueue.end(),
318         [=](const Candidate& C1, const Candidate& C2) {
319         return C1.SU->getHeight() < C2.SU->getHeight();
320       })->SU;
321       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
322     }
323     if (AvailQueue.empty())
324       break;
325 
326     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
327                          "Ready queue:";
328                for (auto &C
329                     : AvailQueue) dbgs()
330                << ' ' << C.SU->NodeNum;
331                dbgs() << '\n';);
332 
333     auto C = pickCandidate();
334     assert(C);
335     AvailQueue.remove(*C);
336     auto SU = C->SU;
337     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
338 
339     advanceToCycle(SU->getHeight());
340 
341     releasePredecessors(SU);
342     Schedule.push_back(SU);
343     SU->isScheduled = true;
344   }
345   assert(SUnits.size() == Schedule.size());
346 
347   std::reverse(Schedule.begin(), Schedule.end());
348 
349   // restore units
350   for (auto &SU : SUnits)
351     SU = SUSavedCopy[SU.NodeNum];
352 
353   return Schedule;
354 }
355 
356 namespace llvm {
357 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
358                                               const ScheduleDAG &DAG) {
359   GCNILPScheduler S;
360   return S.schedule(BotRoots, DAG);
361 }
362 }
363