xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 //===- GCNMinRegStrategy.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines and imlements the class GCNMinRegScheduler, which
11 /// implements an experimental, simple scheduler whose main goal is to learn
12 /// ways about consuming less possible registers for a region.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/ilist_node.h"
20 #include "llvm/ADT/simple_ilist.h"
21 #include "llvm/CodeGen/ScheduleDAG.h"
22 #include "llvm/Support/Allocator.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 #include <cstdint>
27 #include <limits>
28 #include <vector>
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "machine-scheduler"
33 
34 namespace {
35 
36 class GCNMinRegScheduler {
37   struct Candidate : ilist_node<Candidate> {
38     const SUnit *SU;
39     int Priority;
40 
41     Candidate(const SUnit *SU_, int Priority_ = 0)
42       : SU(SU_), Priority(Priority_) {}
43   };
44 
45   SpecificBumpPtrAllocator<Candidate> Alloc;
46   using Queue = simple_ilist<Candidate>;
47   Queue RQ; // Ready queue
48 
49   std::vector<unsigned> NumPreds;
50 
51   bool isScheduled(const SUnit *SU) const {
52     assert(!SU->isBoundaryNode());
53     return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max();
54   }
55 
56   void setIsScheduled(const SUnit *SU)  {
57     assert(!SU->isBoundaryNode());
58     NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max();
59   }
60 
61   unsigned getNumPreds(const SUnit *SU) const {
62     assert(!SU->isBoundaryNode());
63     assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
64     return NumPreds[SU->NodeNum];
65   }
66 
67   unsigned decNumPreds(const SUnit *SU) {
68     assert(!SU->isBoundaryNode());
69     assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
70     return --NumPreds[SU->NodeNum];
71   }
72 
73   void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits);
74 
75   int getReadySuccessors(const SUnit *SU) const;
76   int getNotReadySuccessors(const SUnit *SU) const;
77 
78   template <typename Calc>
79   unsigned findMax(unsigned Num, Calc C);
80 
81   Candidate* pickCandidate();
82 
83   void bumpPredsPriority(const SUnit *SchedSU, int Priority);
84   void releaseSuccessors(const SUnit* SU, int Priority);
85 
86 public:
87   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
88                                      const ScheduleDAG &DAG);
89 };
90 
91 } // end anonymous namespace
92 
93 void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
94   NumPreds.resize(SUnits.size());
95   for (unsigned I = 0; I < SUnits.size(); ++I)
96     NumPreds[I] = SUnits[I].NumPredsLeft;
97 }
98 
99 int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const {
100   unsigned NumSchedSuccs = 0;
101   for (auto SDep : SU->Succs) {
102     bool wouldBeScheduled = true;
103     for (auto PDep : SDep.getSUnit()->Preds) {
104       auto PSU = PDep.getSUnit();
105       assert(!PSU->isBoundaryNode());
106       if (PSU != SU && !isScheduled(PSU)) {
107         wouldBeScheduled = false;
108         break;
109       }
110     }
111     NumSchedSuccs += wouldBeScheduled ? 1 : 0;
112   }
113   return NumSchedSuccs;
114 }
115 
116 int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
117   return SU->Succs.size() - getReadySuccessors(SU);
118 }
119 
120 template <typename Calc>
121 unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
122   assert(!RQ.empty() && Num <= RQ.size());
123 
124   using T = decltype(C(*RQ.begin())) ;
125 
126   T Max = std::numeric_limits<T>::min();
127   unsigned NumMax = 0;
128   for (auto I = RQ.begin(); Num; --Num) {
129     T Cur = C(*I);
130     if (Cur >= Max) {
131       if (Cur > Max) {
132         Max = Cur;
133         NumMax = 1;
134       } else
135         ++NumMax;
136       auto &Cand = *I++;
137       RQ.remove(Cand);
138       RQ.push_front(Cand);
139       continue;
140     }
141     ++I;
142   }
143   return NumMax;
144 }
145 
146 GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
147   do {
148     unsigned Num = RQ.size();
149     if (Num == 1) break;
150 
151     LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num
152                       << '\n');
153     Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
154     if (Num == 1) break;
155 
156     LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
157                       << Num << '\n');
158     Num = findMax(Num, [=](const Candidate &C) {
159       auto SU = C.SU;
160       int Res = getNotReadySuccessors(SU);
161       LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
162                         << Res << " successors, metric = " << -Res << '\n');
163       return -Res;
164     });
165     if (Num == 1) break;
166 
167     LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num
168                       << '\n');
169     Num = findMax(Num, [=](const Candidate &C) {
170       auto SU = C.SU;
171       auto Res = getReadySuccessors(SU);
172       LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res
173                         << " successors, metric = " << Res << '\n');
174       return Res;
175     });
176     if (Num == 1) break;
177 
178     Num = Num ? Num : RQ.size();
179     LLVM_DEBUG(
180         dbgs()
181         << "\nCan't find best candidate, selecting in program order among "
182         << Num << '\n');
183     Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
184     assert(Num == 1);
185   } while (false);
186 
187   return &RQ.front();
188 }
189 
190 void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
191   SmallPtrSet<const SUnit*, 32> Set;
192   for (const auto &S : SchedSU->Succs) {
193     if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
194         S.getKind() != SDep::Data)
195       continue;
196     for (const auto &P : S.getSUnit()->Preds) {
197       auto PSU = P.getSUnit();
198       assert(!PSU->isBoundaryNode());
199       if (PSU != SchedSU && !isScheduled(PSU)) {
200         Set.insert(PSU);
201       }
202     }
203   }
204   SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end());
205   while (!Worklist.empty()) {
206     auto SU = Worklist.pop_back_val();
207     assert(!SU->isBoundaryNode());
208     for (const auto &P : SU->Preds) {
209       if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) &&
210           Set.insert(P.getSUnit()).second)
211         Worklist.push_back(P.getSUnit());
212     }
213   }
214   LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
215                     << ")'s non-ready successors of " << Priority
216                     << " priority in ready queue: ");
217   for (auto &C : RQ) {
218     if (Set.count(C.SU)) {
219       C.Priority = Priority;
220       LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
221     }
222   }
223   LLVM_DEBUG(dbgs() << '\n');
224 }
225 
226 void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
227   for (const auto &S : SU->Succs) {
228     auto SuccSU = S.getSUnit();
229     if (S.isWeak())
230       continue;
231     assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
232     if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
233       RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority));
234   }
235 }
236 
237 std::vector<const SUnit*>
238 GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
239                              const ScheduleDAG &DAG) {
240   const auto &SUnits = DAG.SUnits;
241   std::vector<const SUnit*> Schedule;
242   Schedule.reserve(SUnits.size());
243 
244   initNumPreds(SUnits);
245 
246   int StepNo = 0;
247 
248   for (auto SU : TopRoots) {
249     RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo));
250   }
251   releaseSuccessors(&DAG.EntrySU, StepNo);
252 
253   while (!RQ.empty()) {
254     LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo
255                       << "\n"
256                          "Ready queue:";
257                for (auto &C
258                     : RQ) dbgs()
259                << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
260                dbgs() << '\n';);
261 
262     auto C = pickCandidate();
263     assert(C);
264     RQ.remove(*C);
265     auto SU = C->SU;
266     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
267 
268     releaseSuccessors(SU, StepNo);
269     Schedule.push_back(SU);
270     setIsScheduled(SU);
271 
272     if (getReadySuccessors(SU) == 0)
273       bumpPredsPriority(SU, StepNo);
274 
275     ++StepNo;
276   }
277   assert(SUnits.size() == Schedule.size());
278 
279   return Schedule;
280 }
281 
282 namespace llvm {
283 
284 std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
285                                              const ScheduleDAG &DAG) {
286   GCNMinRegScheduler S;
287   return S.schedule(TopRoots, DAG);
288 }
289 
290 } // end namespace llvm
291