xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp (revision 47e073941f4e7ca6e9bde3fa65abbfcfed6bfa2b)
1  //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements the ResourcePriorityQueue class, which is a
10  // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11  // reduce the length of the critical path through the basic block
12  // on VLIW platforms.
13  // The scheduler is basically a top-down adaptable list scheduler with DFA
14  // resource tracking added to the cost function.
15  // DFA is queried as a state machine to model "packets/bundles" during
16  // schedule. Currently packets/bundles are discarded at the end of
17  // scheduling, affecting only order of instructions.
18  //
19  //===----------------------------------------------------------------------===//
20  
21  #include "llvm/CodeGen/ResourcePriorityQueue.h"
22  #include "llvm/CodeGen/DFAPacketizer.h"
23  #include "llvm/CodeGen/SelectionDAGISel.h"
24  #include "llvm/CodeGen/SelectionDAGNodes.h"
25  #include "llvm/CodeGen/TargetInstrInfo.h"
26  #include "llvm/CodeGen/TargetLowering.h"
27  #include "llvm/CodeGen/TargetRegisterInfo.h"
28  #include "llvm/CodeGen/TargetSubtargetInfo.h"
29  #include "llvm/Support/CommandLine.h"
30  
31  using namespace llvm;
32  
33  #define DEBUG_TYPE "scheduler"
34  
35  static cl::opt<bool>
36      DisableDFASched("disable-dfa-sched", cl::Hidden,
37                      cl::desc("Disable use of DFA during scheduling"));
38  
39  static cl::opt<int> RegPressureThreshold(
40      "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5),
41      cl::desc("Track reg pressure and switch priority to in-depth"));
42  
43  ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44      : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45    const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46    TRI = STI.getRegisterInfo();
47    TLI = IS->TLI;
48    TII = STI.getInstrInfo();
49    ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
50    // This hard requirement could be relaxed, but for now
51    // do not let it proceed.
52    assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
53  
54    unsigned NumRC = TRI->getNumRegClasses();
55    RegLimit.resize(NumRC);
56    RegPressure.resize(NumRC);
57    std::fill(RegLimit.begin(), RegLimit.end(), 0);
58    std::fill(RegPressure.begin(), RegPressure.end(), 0);
59    for (const TargetRegisterClass *RC : TRI->regclasses())
60      RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
61  
62    ParallelLiveRanges = 0;
63    HorizontalVerticalBalance = 0;
64  }
65  
66  unsigned
67  ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
68    unsigned NumberDeps = 0;
69    for (SDep &Pred : SU->Preds) {
70      if (Pred.isCtrl())
71        continue;
72  
73      SUnit *PredSU = Pred.getSUnit();
74      const SDNode *ScegN = PredSU->getNode();
75  
76      if (!ScegN)
77        continue;
78  
79      // If value is passed to CopyToReg, it is probably
80      // live outside BB.
81      switch (ScegN->getOpcode()) {
82        default:  break;
83        case ISD::TokenFactor:    break;
84        case ISD::CopyFromReg:    NumberDeps++;  break;
85        case ISD::CopyToReg:      break;
86        case ISD::INLINEASM:      break;
87        case ISD::INLINEASM_BR:   break;
88      }
89      if (!ScegN->isMachineOpcode())
90        continue;
91  
92      for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
93        MVT VT = ScegN->getSimpleValueType(i);
94        if (TLI->isTypeLegal(VT)
95            && (TLI->getRegClassFor(VT)->getID() == RCId)) {
96          NumberDeps++;
97          break;
98        }
99      }
100    }
101    return NumberDeps;
102  }
103  
104  unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
105                                                      unsigned RCId) {
106    unsigned NumberDeps = 0;
107    for (const SDep &Succ : SU->Succs) {
108      if (Succ.isCtrl())
109        continue;
110  
111      SUnit *SuccSU = Succ.getSUnit();
112      const SDNode *ScegN = SuccSU->getNode();
113      if (!ScegN)
114        continue;
115  
116      // If value is passed to CopyToReg, it is probably
117      // live outside BB.
118      switch (ScegN->getOpcode()) {
119        default:  break;
120        case ISD::TokenFactor:    break;
121        case ISD::CopyFromReg:    break;
122        case ISD::CopyToReg:      NumberDeps++;  break;
123        case ISD::INLINEASM:      break;
124        case ISD::INLINEASM_BR:   break;
125      }
126      if (!ScegN->isMachineOpcode())
127        continue;
128  
129      for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
130        const SDValue &Op = ScegN->getOperand(i);
131        MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
132        if (TLI->isTypeLegal(VT)
133            && (TLI->getRegClassFor(VT)->getID() == RCId)) {
134          NumberDeps++;
135          break;
136        }
137      }
138    }
139    return NumberDeps;
140  }
141  
142  static unsigned numberCtrlDepsInSU(SUnit *SU) {
143    unsigned NumberDeps = 0;
144    for (const SDep &Succ : SU->Succs)
145      if (Succ.isCtrl())
146        NumberDeps++;
147  
148    return NumberDeps;
149  }
150  
151  static unsigned numberCtrlPredInSU(SUnit *SU) {
152    unsigned NumberDeps = 0;
153    for (SDep &Pred : SU->Preds)
154      if (Pred.isCtrl())
155        NumberDeps++;
156  
157    return NumberDeps;
158  }
159  
160  ///
161  /// Initialize nodes.
162  ///
163  void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
164    SUnits = &sunits;
165    NumNodesSolelyBlocking.resize(SUnits->size(), 0);
166  
167    for (SUnit &SU : *SUnits) {
168      initNumRegDefsLeft(&SU);
169      SU.NodeQueueId = 0;
170    }
171  }
172  
173  /// This heuristic is used if DFA scheduling is not desired
174  /// for some VLIW platform.
175  bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
176    // The isScheduleHigh flag allows nodes with wraparound dependencies that
177    // cannot easily be modeled as edges with latencies to be scheduled as
178    // soon as possible in a top-down schedule.
179    if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
180      return false;
181  
182    if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
183      return true;
184  
185    unsigned LHSNum = LHS->NodeNum;
186    unsigned RHSNum = RHS->NodeNum;
187  
188    // The most important heuristic is scheduling the critical path.
189    unsigned LHSLatency = PQ->getLatency(LHSNum);
190    unsigned RHSLatency = PQ->getLatency(RHSNum);
191    if (LHSLatency < RHSLatency) return true;
192    if (LHSLatency > RHSLatency) return false;
193  
194    // After that, if two nodes have identical latencies, look to see if one will
195    // unblock more other nodes than the other.
196    unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
197    unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
198    if (LHSBlocked < RHSBlocked) return true;
199    if (LHSBlocked > RHSBlocked) return false;
200  
201    // Finally, just to provide a stable ordering, use the node number as a
202    // deciding factor.
203    return LHSNum < RHSNum;
204  }
205  
206  
207  /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
208  /// of SU, return it, otherwise return null.
209  SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
210    SUnit *OnlyAvailablePred = nullptr;
211    for (const SDep &Pred : SU->Preds) {
212      SUnit &PredSU = *Pred.getSUnit();
213      if (!PredSU.isScheduled) {
214        // We found an available, but not scheduled, predecessor.  If it's the
215        // only one we have found, keep track of it... otherwise give up.
216        if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
217          return nullptr;
218        OnlyAvailablePred = &PredSU;
219      }
220    }
221    return OnlyAvailablePred;
222  }
223  
224  void ResourcePriorityQueue::push(SUnit *SU) {
225    // Look at all of the successors of this node.  Count the number of nodes that
226    // this node is the sole unscheduled node for.
227    unsigned NumNodesBlocking = 0;
228    for (const SDep &Succ : SU->Succs)
229      if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
230        ++NumNodesBlocking;
231  
232    NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
233    Queue.push_back(SU);
234  }
235  
236  /// Check if scheduling of this SU is possible
237  /// in the current packet.
238  bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
239    if (!SU || !SU->getNode())
240      return false;
241  
242    // If this is a compound instruction,
243    // it is likely to be a call. Do not delay it.
244    if (SU->getNode()->getGluedNode())
245      return true;
246  
247    // First see if the pipeline could receive this instruction
248    // in the current cycle.
249    if (SU->getNode()->isMachineOpcode())
250      switch (SU->getNode()->getMachineOpcode()) {
251      default:
252        if (!ResourcesModel->canReserveResources(&TII->get(
253            SU->getNode()->getMachineOpcode())))
254             return false;
255        break;
256      case TargetOpcode::EXTRACT_SUBREG:
257      case TargetOpcode::INSERT_SUBREG:
258      case TargetOpcode::SUBREG_TO_REG:
259      case TargetOpcode::REG_SEQUENCE:
260      case TargetOpcode::IMPLICIT_DEF:
261          break;
262      }
263  
264    // Now see if there are no other dependencies
265    // to instructions already in the packet.
266    for (const SUnit *S : Packet)
267      for (const SDep &Succ : S->Succs) {
268        // Since we do not add pseudos to packets, might as well
269        // ignore order deps.
270        if (Succ.isCtrl())
271          continue;
272  
273        if (Succ.getSUnit() == SU)
274          return false;
275      }
276  
277    return true;
278  }
279  
280  /// Keep track of available resources.
281  void ResourcePriorityQueue::reserveResources(SUnit *SU) {
282    // If this SU does not fit in the packet
283    // start a new one.
284    if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
285      ResourcesModel->clearResources();
286      Packet.clear();
287    }
288  
289    if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
290      switch (SU->getNode()->getMachineOpcode()) {
291      default:
292        ResourcesModel->reserveResources(&TII->get(
293          SU->getNode()->getMachineOpcode()));
294        break;
295      case TargetOpcode::EXTRACT_SUBREG:
296      case TargetOpcode::INSERT_SUBREG:
297      case TargetOpcode::SUBREG_TO_REG:
298      case TargetOpcode::REG_SEQUENCE:
299      case TargetOpcode::IMPLICIT_DEF:
300        break;
301      }
302      Packet.push_back(SU);
303    }
304    // Forcefully end packet for PseudoOps.
305    else {
306      ResourcesModel->clearResources();
307      Packet.clear();
308    }
309  
310    // If packet is now full, reset the state so in the next cycle
311    // we start fresh.
312    if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
313      ResourcesModel->clearResources();
314      Packet.clear();
315    }
316  }
317  
318  int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
319    int RegBalance = 0;
320  
321    if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
322      return RegBalance;
323  
324    // Gen estimate.
325    for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
326        MVT VT = SU->getNode()->getSimpleValueType(i);
327        if (TLI->isTypeLegal(VT)
328            && TLI->getRegClassFor(VT)
329            && TLI->getRegClassFor(VT)->getID() == RCId)
330          RegBalance += numberRCValSuccInSU(SU, RCId);
331    }
332    // Kill estimate.
333    for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
334        const SDValue &Op = SU->getNode()->getOperand(i);
335        MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
336        if (isa<ConstantSDNode>(Op.getNode()))
337          continue;
338  
339        if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
340            && TLI->getRegClassFor(VT)->getID() == RCId)
341          RegBalance -= numberRCValPredInSU(SU, RCId);
342    }
343    return RegBalance;
344  }
345  
346  /// Estimates change in reg pressure from this SU.
347  /// It is achieved by trivial tracking of defined
348  /// and used vregs in dependent instructions.
349  /// The RawPressure flag makes this function to ignore
350  /// existing reg file sizes, and report raw def/use
351  /// balance.
352  int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
353    int RegBalance = 0;
354  
355    if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
356      return RegBalance;
357  
358    if (RawPressure) {
359      for (const TargetRegisterClass *RC : TRI->regclasses())
360        RegBalance += rawRegPressureDelta(SU, RC->getID());
361    }
362    else {
363      for (const TargetRegisterClass *RC : TRI->regclasses()) {
364        if ((RegPressure[RC->getID()] +
365             rawRegPressureDelta(SU, RC->getID()) > 0) &&
366            (RegPressure[RC->getID()] +
367             rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
368          RegBalance += rawRegPressureDelta(SU, RC->getID());
369      }
370    }
371  
372    return RegBalance;
373  }
374  
375  // Constants used to denote relative importance of
376  // heuristic components for cost computation.
377  static const unsigned PriorityOne = 200;
378  static const unsigned PriorityTwo = 50;
379  static const unsigned PriorityThree = 15;
380  static const unsigned PriorityFour = 5;
381  static const unsigned ScaleOne = 20;
382  static const unsigned ScaleTwo = 10;
383  static const unsigned ScaleThree = 5;
384  static const unsigned FactorOne = 2;
385  
386  /// Returns single number reflecting benefit of scheduling SU
387  /// in the current cycle.
388  int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
389    // Initial trivial priority.
390    int ResCount = 1;
391  
392    // Do not waste time on a node that is already scheduled.
393    if (SU->isScheduled)
394      return ResCount;
395  
396    // Forced priority is high.
397    if (SU->isScheduleHigh)
398      ResCount += PriorityOne;
399  
400    // Adaptable scheduling
401    // A small, but very parallel
402    // region, where reg pressure is an issue.
403    if (HorizontalVerticalBalance > RegPressureThreshold) {
404      // Critical path first
405      ResCount += (SU->getHeight() * ScaleTwo);
406      // If resources are available for it, multiply the
407      // chance of scheduling.
408      if (isResourceAvailable(SU))
409        ResCount <<= FactorOne;
410  
411      // Consider change to reg pressure from scheduling
412      // this SU.
413      ResCount -= (regPressureDelta(SU,true) * ScaleOne);
414    }
415    // Default heuristic, greeady and
416    // critical path driven.
417    else {
418      // Critical path first.
419      ResCount += (SU->getHeight() * ScaleTwo);
420      // Now see how many instructions is blocked by this SU.
421      ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
422      // If resources are available for it, multiply the
423      // chance of scheduling.
424      if (isResourceAvailable(SU))
425        ResCount <<= FactorOne;
426  
427      ResCount -= (regPressureDelta(SU) * ScaleTwo);
428    }
429  
430    // These are platform-specific things.
431    // Will need to go into the back end
432    // and accessed from here via a hook.
433    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
434      if (N->isMachineOpcode()) {
435        const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
436        if (TID.isCall())
437          ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
438      }
439      else
440        switch (N->getOpcode()) {
441        default:  break;
442        case ISD::TokenFactor:
443        case ISD::CopyFromReg:
444        case ISD::CopyToReg:
445          ResCount += PriorityFour;
446          break;
447  
448        case ISD::INLINEASM:
449        case ISD::INLINEASM_BR:
450          ResCount += PriorityThree;
451          break;
452        }
453    }
454    return ResCount;
455  }
456  
457  
458  /// Main resource tracking point.
459  void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
460    // Use NULL entry as an event marker to reset
461    // the DFA state.
462    if (!SU) {
463      ResourcesModel->clearResources();
464      Packet.clear();
465      return;
466    }
467  
468    const SDNode *ScegN = SU->getNode();
469    // Update reg pressure tracking.
470    // First update current node.
471    if (ScegN->isMachineOpcode()) {
472      // Estimate generated regs.
473      for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
474        MVT VT = ScegN->getSimpleValueType(i);
475  
476        if (TLI->isTypeLegal(VT)) {
477          const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
478          if (RC)
479            RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
480        }
481      }
482      // Estimate killed regs.
483      for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
484        const SDValue &Op = ScegN->getOperand(i);
485        MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
486  
487        if (TLI->isTypeLegal(VT)) {
488          const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
489          if (RC) {
490            if (RegPressure[RC->getID()] >
491              (numberRCValPredInSU(SU, RC->getID())))
492              RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
493            else RegPressure[RC->getID()] = 0;
494          }
495        }
496      }
497      for (SDep &Pred : SU->Preds) {
498        if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
499          continue;
500        --Pred.getSUnit()->NumRegDefsLeft;
501      }
502    }
503  
504    // Reserve resources for this SU.
505    reserveResources(SU);
506  
507    // Adjust number of parallel live ranges.
508    // Heuristic is simple - node with no data successors reduces
509    // number of live ranges. All others, increase it.
510    unsigned NumberNonControlDeps = 0;
511  
512    for (const SDep &Succ : SU->Succs) {
513      adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
514      if (!Succ.isCtrl())
515        NumberNonControlDeps++;
516    }
517  
518    if (!NumberNonControlDeps) {
519      if (ParallelLiveRanges >= SU->NumPreds)
520        ParallelLiveRanges -= SU->NumPreds;
521      else
522        ParallelLiveRanges = 0;
523  
524    }
525    else
526      ParallelLiveRanges += SU->NumRegDefsLeft;
527  
528    // Track parallel live chains.
529    HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
530    HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
531  }
532  
533  void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
534    unsigned  NodeNumDefs = 0;
535    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
536      if (N->isMachineOpcode()) {
537        const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
538        // No register need be allocated for this.
539        if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
540          NodeNumDefs = 0;
541          break;
542        }
543        NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
544      }
545      else
546        switch(N->getOpcode()) {
547          default:     break;
548          case ISD::CopyFromReg:
549            NodeNumDefs++;
550            break;
551          case ISD::INLINEASM:
552          case ISD::INLINEASM_BR:
553            NodeNumDefs++;
554            break;
555        }
556  
557    SU->NumRegDefsLeft = NodeNumDefs;
558  }
559  
560  /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
561  /// scheduled.  If SU is not itself available, then there is at least one
562  /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
563  /// unscheduled predecessor, we want to increase its priority: it getting
564  /// scheduled will make this node available, so it is better than some other
565  /// node of the same priority that will not make a node available.
566  void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
567    if (SU->isAvailable) return;  // All preds scheduled.
568  
569    SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
570    if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
571      return;
572  
573    // Okay, we found a single predecessor that is available, but not scheduled.
574    // Since it is available, it must be in the priority queue.  First remove it.
575    remove(OnlyAvailablePred);
576  
577    // Reinsert the node into the priority queue, which recomputes its
578    // NumNodesSolelyBlocking value.
579    push(OnlyAvailablePred);
580  }
581  
582  
583  /// Main access point - returns next instructions
584  /// to be placed in scheduling sequence.
585  SUnit *ResourcePriorityQueue::pop() {
586    if (empty())
587      return nullptr;
588  
589    std::vector<SUnit *>::iterator Best = Queue.begin();
590    if (!DisableDFASched) {
591      int BestCost = SUSchedulingCost(*Best);
592      for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
593  
594        if (SUSchedulingCost(*I) > BestCost) {
595          BestCost = SUSchedulingCost(*I);
596          Best = I;
597        }
598      }
599    }
600    // Use default TD scheduling mechanism.
601    else {
602      for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
603        if (Picker(*Best, *I))
604          Best = I;
605    }
606  
607    SUnit *V = *Best;
608    if (Best != std::prev(Queue.end()))
609      std::swap(*Best, Queue.back());
610  
611    Queue.pop_back();
612  
613    return V;
614  }
615  
616  
617  void ResourcePriorityQueue::remove(SUnit *SU) {
618    assert(!Queue.empty() && "Queue is empty!");
619    std::vector<SUnit *>::iterator I = find(Queue, SU);
620    if (I != std::prev(Queue.end()))
621      std::swap(*I, Queue.back());
622  
623    Queue.pop_back();
624  }
625