xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
12 // on VLIW platforms.
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLowering.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Target/TargetMachine.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "scheduler"
34 
35 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
36   cl::ZeroOrMore, cl::init(false),
37   cl::desc("Disable use of DFA during scheduling"));
38 
39 static cl::opt<int> RegPressureThreshold(
40   "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
41   cl::desc("Track reg pressure and switch priority to in-depth"));
42 
43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44     : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46   TRI = STI.getRegisterInfo();
47   TLI = IS->TLI;
48   TII = STI.getInstrInfo();
49   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
50   // This hard requirement could be relaxed, but for now
51   // do not let it proceed.
52   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
53 
54   unsigned NumRC = TRI->getNumRegClasses();
55   RegLimit.resize(NumRC);
56   RegPressure.resize(NumRC);
57   std::fill(RegLimit.begin(), RegLimit.end(), 0);
58   std::fill(RegPressure.begin(), RegPressure.end(), 0);
59   for (const TargetRegisterClass *RC : TRI->regclasses())
60     RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
61 
62   ParallelLiveRanges = 0;
63   HorizontalVerticalBalance = 0;
64 }
65 
66 unsigned
67 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
68   unsigned NumberDeps = 0;
69   for (SDep &Pred : SU->Preds) {
70     if (Pred.isCtrl())
71       continue;
72 
73     SUnit *PredSU = Pred.getSUnit();
74     const SDNode *ScegN = PredSU->getNode();
75 
76     if (!ScegN)
77       continue;
78 
79     // If value is passed to CopyToReg, it is probably
80     // live outside BB.
81     switch (ScegN->getOpcode()) {
82       default:  break;
83       case ISD::TokenFactor:    break;
84       case ISD::CopyFromReg:    NumberDeps++;  break;
85       case ISD::CopyToReg:      break;
86       case ISD::INLINEASM:      break;
87       case ISD::INLINEASM_BR:   break;
88     }
89     if (!ScegN->isMachineOpcode())
90       continue;
91 
92     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
93       MVT VT = ScegN->getSimpleValueType(i);
94       if (TLI->isTypeLegal(VT)
95           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
96         NumberDeps++;
97         break;
98       }
99     }
100   }
101   return NumberDeps;
102 }
103 
104 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
105                                                     unsigned RCId) {
106   unsigned NumberDeps = 0;
107   for (const SDep &Succ : SU->Succs) {
108     if (Succ.isCtrl())
109       continue;
110 
111     SUnit *SuccSU = Succ.getSUnit();
112     const SDNode *ScegN = SuccSU->getNode();
113     if (!ScegN)
114       continue;
115 
116     // If value is passed to CopyToReg, it is probably
117     // live outside BB.
118     switch (ScegN->getOpcode()) {
119       default:  break;
120       case ISD::TokenFactor:    break;
121       case ISD::CopyFromReg:    break;
122       case ISD::CopyToReg:      NumberDeps++;  break;
123       case ISD::INLINEASM:      break;
124       case ISD::INLINEASM_BR:   break;
125     }
126     if (!ScegN->isMachineOpcode())
127       continue;
128 
129     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
130       const SDValue &Op = ScegN->getOperand(i);
131       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
132       if (TLI->isTypeLegal(VT)
133           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
134         NumberDeps++;
135         break;
136       }
137     }
138   }
139   return NumberDeps;
140 }
141 
142 static unsigned numberCtrlDepsInSU(SUnit *SU) {
143   unsigned NumberDeps = 0;
144   for (const SDep &Succ : SU->Succs)
145     if (Succ.isCtrl())
146       NumberDeps++;
147 
148   return NumberDeps;
149 }
150 
151 static unsigned numberCtrlPredInSU(SUnit *SU) {
152   unsigned NumberDeps = 0;
153   for (SDep &Pred : SU->Preds)
154     if (Pred.isCtrl())
155       NumberDeps++;
156 
157   return NumberDeps;
158 }
159 
160 ///
161 /// Initialize nodes.
162 ///
163 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
164   SUnits = &sunits;
165   NumNodesSolelyBlocking.resize(SUnits->size(), 0);
166 
167   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
168     SUnit *SU = &(*SUnits)[i];
169     initNumRegDefsLeft(SU);
170     SU->NodeQueueId = 0;
171   }
172 }
173 
174 /// This heuristic is used if DFA scheduling is not desired
175 /// for some VLIW platform.
176 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
177   // The isScheduleHigh flag allows nodes with wraparound dependencies that
178   // cannot easily be modeled as edges with latencies to be scheduled as
179   // soon as possible in a top-down schedule.
180   if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
181     return false;
182 
183   if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
184     return true;
185 
186   unsigned LHSNum = LHS->NodeNum;
187   unsigned RHSNum = RHS->NodeNum;
188 
189   // The most important heuristic is scheduling the critical path.
190   unsigned LHSLatency = PQ->getLatency(LHSNum);
191   unsigned RHSLatency = PQ->getLatency(RHSNum);
192   if (LHSLatency < RHSLatency) return true;
193   if (LHSLatency > RHSLatency) return false;
194 
195   // After that, if two nodes have identical latencies, look to see if one will
196   // unblock more other nodes than the other.
197   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
198   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
199   if (LHSBlocked < RHSBlocked) return true;
200   if (LHSBlocked > RHSBlocked) return false;
201 
202   // Finally, just to provide a stable ordering, use the node number as a
203   // deciding factor.
204   return LHSNum < RHSNum;
205 }
206 
207 
208 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
209 /// of SU, return it, otherwise return null.
210 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
211   SUnit *OnlyAvailablePred = nullptr;
212   for (const SDep &Pred : SU->Preds) {
213     SUnit &PredSU = *Pred.getSUnit();
214     if (!PredSU.isScheduled) {
215       // We found an available, but not scheduled, predecessor.  If it's the
216       // only one we have found, keep track of it... otherwise give up.
217       if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
218         return nullptr;
219       OnlyAvailablePred = &PredSU;
220     }
221   }
222   return OnlyAvailablePred;
223 }
224 
225 void ResourcePriorityQueue::push(SUnit *SU) {
226   // Look at all of the successors of this node.  Count the number of nodes that
227   // this node is the sole unscheduled node for.
228   unsigned NumNodesBlocking = 0;
229   for (const SDep &Succ : SU->Succs)
230     if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
231       ++NumNodesBlocking;
232 
233   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
234   Queue.push_back(SU);
235 }
236 
237 /// Check if scheduling of this SU is possible
238 /// in the current packet.
239 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
240   if (!SU || !SU->getNode())
241     return false;
242 
243   // If this is a compound instruction,
244   // it is likely to be a call. Do not delay it.
245   if (SU->getNode()->getGluedNode())
246     return true;
247 
248   // First see if the pipeline could receive this instruction
249   // in the current cycle.
250   if (SU->getNode()->isMachineOpcode())
251     switch (SU->getNode()->getMachineOpcode()) {
252     default:
253       if (!ResourcesModel->canReserveResources(&TII->get(
254           SU->getNode()->getMachineOpcode())))
255            return false;
256       break;
257     case TargetOpcode::EXTRACT_SUBREG:
258     case TargetOpcode::INSERT_SUBREG:
259     case TargetOpcode::SUBREG_TO_REG:
260     case TargetOpcode::REG_SEQUENCE:
261     case TargetOpcode::IMPLICIT_DEF:
262         break;
263     }
264 
265   // Now see if there are no other dependencies
266   // to instructions already in the packet.
267   for (unsigned i = 0, e = Packet.size(); i != e; ++i)
268     for (const SDep &Succ : Packet[i]->Succs) {
269       // Since we do not add pseudos to packets, might as well
270       // ignore order deps.
271       if (Succ.isCtrl())
272         continue;
273 
274       if (Succ.getSUnit() == SU)
275         return false;
276     }
277 
278   return true;
279 }
280 
281 /// Keep track of available resources.
282 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
283   // If this SU does not fit in the packet
284   // start a new one.
285   if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
286     ResourcesModel->clearResources();
287     Packet.clear();
288   }
289 
290   if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
291     switch (SU->getNode()->getMachineOpcode()) {
292     default:
293       ResourcesModel->reserveResources(&TII->get(
294         SU->getNode()->getMachineOpcode()));
295       break;
296     case TargetOpcode::EXTRACT_SUBREG:
297     case TargetOpcode::INSERT_SUBREG:
298     case TargetOpcode::SUBREG_TO_REG:
299     case TargetOpcode::REG_SEQUENCE:
300     case TargetOpcode::IMPLICIT_DEF:
301       break;
302     }
303     Packet.push_back(SU);
304   }
305   // Forcefully end packet for PseudoOps.
306   else {
307     ResourcesModel->clearResources();
308     Packet.clear();
309   }
310 
311   // If packet is now full, reset the state so in the next cycle
312   // we start fresh.
313   if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
314     ResourcesModel->clearResources();
315     Packet.clear();
316   }
317 }
318 
319 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
320   int RegBalance = 0;
321 
322   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
323     return RegBalance;
324 
325   // Gen estimate.
326   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
327       MVT VT = SU->getNode()->getSimpleValueType(i);
328       if (TLI->isTypeLegal(VT)
329           && TLI->getRegClassFor(VT)
330           && TLI->getRegClassFor(VT)->getID() == RCId)
331         RegBalance += numberRCValSuccInSU(SU, RCId);
332   }
333   // Kill estimate.
334   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
335       const SDValue &Op = SU->getNode()->getOperand(i);
336       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
337       if (isa<ConstantSDNode>(Op.getNode()))
338         continue;
339 
340       if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
341           && TLI->getRegClassFor(VT)->getID() == RCId)
342         RegBalance -= numberRCValPredInSU(SU, RCId);
343   }
344   return RegBalance;
345 }
346 
347 /// Estimates change in reg pressure from this SU.
348 /// It is achieved by trivial tracking of defined
349 /// and used vregs in dependent instructions.
350 /// The RawPressure flag makes this function to ignore
351 /// existing reg file sizes, and report raw def/use
352 /// balance.
353 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
354   int RegBalance = 0;
355 
356   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
357     return RegBalance;
358 
359   if (RawPressure) {
360     for (const TargetRegisterClass *RC : TRI->regclasses())
361       RegBalance += rawRegPressureDelta(SU, RC->getID());
362   }
363   else {
364     for (const TargetRegisterClass *RC : TRI->regclasses()) {
365       if ((RegPressure[RC->getID()] +
366            rawRegPressureDelta(SU, RC->getID()) > 0) &&
367           (RegPressure[RC->getID()] +
368            rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
369         RegBalance += rawRegPressureDelta(SU, RC->getID());
370     }
371   }
372 
373   return RegBalance;
374 }
375 
376 // Constants used to denote relative importance of
377 // heuristic components for cost computation.
378 static const unsigned PriorityOne = 200;
379 static const unsigned PriorityTwo = 50;
380 static const unsigned PriorityThree = 15;
381 static const unsigned PriorityFour = 5;
382 static const unsigned ScaleOne = 20;
383 static const unsigned ScaleTwo = 10;
384 static const unsigned ScaleThree = 5;
385 static const unsigned FactorOne = 2;
386 
387 /// Returns single number reflecting benefit of scheduling SU
388 /// in the current cycle.
389 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
390   // Initial trivial priority.
391   int ResCount = 1;
392 
393   // Do not waste time on a node that is already scheduled.
394   if (SU->isScheduled)
395     return ResCount;
396 
397   // Forced priority is high.
398   if (SU->isScheduleHigh)
399     ResCount += PriorityOne;
400 
401   // Adaptable scheduling
402   // A small, but very parallel
403   // region, where reg pressure is an issue.
404   if (HorizontalVerticalBalance > RegPressureThreshold) {
405     // Critical path first
406     ResCount += (SU->getHeight() * ScaleTwo);
407     // If resources are available for it, multiply the
408     // chance of scheduling.
409     if (isResourceAvailable(SU))
410       ResCount <<= FactorOne;
411 
412     // Consider change to reg pressure from scheduling
413     // this SU.
414     ResCount -= (regPressureDelta(SU,true) * ScaleOne);
415   }
416   // Default heuristic, greeady and
417   // critical path driven.
418   else {
419     // Critical path first.
420     ResCount += (SU->getHeight() * ScaleTwo);
421     // Now see how many instructions is blocked by this SU.
422     ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
423     // If resources are available for it, multiply the
424     // chance of scheduling.
425     if (isResourceAvailable(SU))
426       ResCount <<= FactorOne;
427 
428     ResCount -= (regPressureDelta(SU) * ScaleTwo);
429   }
430 
431   // These are platform-specific things.
432   // Will need to go into the back end
433   // and accessed from here via a hook.
434   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
435     if (N->isMachineOpcode()) {
436       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
437       if (TID.isCall())
438         ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
439     }
440     else
441       switch (N->getOpcode()) {
442       default:  break;
443       case ISD::TokenFactor:
444       case ISD::CopyFromReg:
445       case ISD::CopyToReg:
446         ResCount += PriorityFour;
447         break;
448 
449       case ISD::INLINEASM:
450       case ISD::INLINEASM_BR:
451         ResCount += PriorityThree;
452         break;
453       }
454   }
455   return ResCount;
456 }
457 
458 
459 /// Main resource tracking point.
460 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
461   // Use NULL entry as an event marker to reset
462   // the DFA state.
463   if (!SU) {
464     ResourcesModel->clearResources();
465     Packet.clear();
466     return;
467   }
468 
469   const SDNode *ScegN = SU->getNode();
470   // Update reg pressure tracking.
471   // First update current node.
472   if (ScegN->isMachineOpcode()) {
473     // Estimate generated regs.
474     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
475       MVT VT = ScegN->getSimpleValueType(i);
476 
477       if (TLI->isTypeLegal(VT)) {
478         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
479         if (RC)
480           RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
481       }
482     }
483     // Estimate killed regs.
484     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
485       const SDValue &Op = ScegN->getOperand(i);
486       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
487 
488       if (TLI->isTypeLegal(VT)) {
489         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
490         if (RC) {
491           if (RegPressure[RC->getID()] >
492             (numberRCValPredInSU(SU, RC->getID())))
493             RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
494           else RegPressure[RC->getID()] = 0;
495         }
496       }
497     }
498     for (SDep &Pred : SU->Preds) {
499       if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
500         continue;
501       --Pred.getSUnit()->NumRegDefsLeft;
502     }
503   }
504 
505   // Reserve resources for this SU.
506   reserveResources(SU);
507 
508   // Adjust number of parallel live ranges.
509   // Heuristic is simple - node with no data successors reduces
510   // number of live ranges. All others, increase it.
511   unsigned NumberNonControlDeps = 0;
512 
513   for (const SDep &Succ : SU->Succs) {
514     adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
515     if (!Succ.isCtrl())
516       NumberNonControlDeps++;
517   }
518 
519   if (!NumberNonControlDeps) {
520     if (ParallelLiveRanges >= SU->NumPreds)
521       ParallelLiveRanges -= SU->NumPreds;
522     else
523       ParallelLiveRanges = 0;
524 
525   }
526   else
527     ParallelLiveRanges += SU->NumRegDefsLeft;
528 
529   // Track parallel live chains.
530   HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
531   HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
532 }
533 
534 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
535   unsigned  NodeNumDefs = 0;
536   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
537     if (N->isMachineOpcode()) {
538       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
539       // No register need be allocated for this.
540       if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
541         NodeNumDefs = 0;
542         break;
543       }
544       NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
545     }
546     else
547       switch(N->getOpcode()) {
548         default:     break;
549         case ISD::CopyFromReg:
550           NodeNumDefs++;
551           break;
552         case ISD::INLINEASM:
553         case ISD::INLINEASM_BR:
554           NodeNumDefs++;
555           break;
556       }
557 
558   SU->NumRegDefsLeft = NodeNumDefs;
559 }
560 
561 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
562 /// scheduled.  If SU is not itself available, then there is at least one
563 /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
564 /// unscheduled predecessor, we want to increase its priority: it getting
565 /// scheduled will make this node available, so it is better than some other
566 /// node of the same priority that will not make a node available.
567 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
568   if (SU->isAvailable) return;  // All preds scheduled.
569 
570   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
571   if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
572     return;
573 
574   // Okay, we found a single predecessor that is available, but not scheduled.
575   // Since it is available, it must be in the priority queue.  First remove it.
576   remove(OnlyAvailablePred);
577 
578   // Reinsert the node into the priority queue, which recomputes its
579   // NumNodesSolelyBlocking value.
580   push(OnlyAvailablePred);
581 }
582 
583 
584 /// Main access point - returns next instructions
585 /// to be placed in scheduling sequence.
586 SUnit *ResourcePriorityQueue::pop() {
587   if (empty())
588     return nullptr;
589 
590   std::vector<SUnit *>::iterator Best = Queue.begin();
591   if (!DisableDFASched) {
592     int BestCost = SUSchedulingCost(*Best);
593     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
594 
595       if (SUSchedulingCost(*I) > BestCost) {
596         BestCost = SUSchedulingCost(*I);
597         Best = I;
598       }
599     }
600   }
601   // Use default TD scheduling mechanism.
602   else {
603     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
604       if (Picker(*Best, *I))
605         Best = I;
606   }
607 
608   SUnit *V = *Best;
609   if (Best != std::prev(Queue.end()))
610     std::swap(*Best, Queue.back());
611 
612   Queue.pop_back();
613 
614   return V;
615 }
616 
617 
618 void ResourcePriorityQueue::remove(SUnit *SU) {
619   assert(!Queue.empty() && "Queue is empty!");
620   std::vector<SUnit *>::iterator I = find(Queue, SU);
621   if (I != std::prev(Queue.end()))
622     std::swap(*I, Queue.back());
623 
624   Queue.pop_back();
625 }
626