xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
12 // on VLIW platforms.
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/DFAPacketizer.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/CodeGen/TargetLowering.h"
28 #include "llvm/CodeGen/TargetRegisterInfo.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Target/TargetMachine.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "scheduler"
38 
39 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
40   cl::ZeroOrMore, cl::init(false),
41   cl::desc("Disable use of DFA during scheduling"));
42 
43 static cl::opt<int> RegPressureThreshold(
44   "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
45   cl::desc("Track reg pressure and switch priority to in-depth"));
46 
47 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
48     : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
49   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
50   TRI = STI.getRegisterInfo();
51   TLI = IS->TLI;
52   TII = STI.getInstrInfo();
53   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
54   // This hard requirement could be relaxed, but for now
55   // do not let it proceed.
56   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
57 
58   unsigned NumRC = TRI->getNumRegClasses();
59   RegLimit.resize(NumRC);
60   RegPressure.resize(NumRC);
61   std::fill(RegLimit.begin(), RegLimit.end(), 0);
62   std::fill(RegPressure.begin(), RegPressure.end(), 0);
63   for (const TargetRegisterClass *RC : TRI->regclasses())
64     RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
65 
66   ParallelLiveRanges = 0;
67   HorizontalVerticalBalance = 0;
68 }
69 
70 unsigned
71 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
72   unsigned NumberDeps = 0;
73   for (SDep &Pred : SU->Preds) {
74     if (Pred.isCtrl())
75       continue;
76 
77     SUnit *PredSU = Pred.getSUnit();
78     const SDNode *ScegN = PredSU->getNode();
79 
80     if (!ScegN)
81       continue;
82 
83     // If value is passed to CopyToReg, it is probably
84     // live outside BB.
85     switch (ScegN->getOpcode()) {
86       default:  break;
87       case ISD::TokenFactor:    break;
88       case ISD::CopyFromReg:    NumberDeps++;  break;
89       case ISD::CopyToReg:      break;
90       case ISD::INLINEASM:      break;
91       case ISD::INLINEASM_BR:   break;
92     }
93     if (!ScegN->isMachineOpcode())
94       continue;
95 
96     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
97       MVT VT = ScegN->getSimpleValueType(i);
98       if (TLI->isTypeLegal(VT)
99           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
100         NumberDeps++;
101         break;
102       }
103     }
104   }
105   return NumberDeps;
106 }
107 
108 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
109                                                     unsigned RCId) {
110   unsigned NumberDeps = 0;
111   for (const SDep &Succ : SU->Succs) {
112     if (Succ.isCtrl())
113       continue;
114 
115     SUnit *SuccSU = Succ.getSUnit();
116     const SDNode *ScegN = SuccSU->getNode();
117     if (!ScegN)
118       continue;
119 
120     // If value is passed to CopyToReg, it is probably
121     // live outside BB.
122     switch (ScegN->getOpcode()) {
123       default:  break;
124       case ISD::TokenFactor:    break;
125       case ISD::CopyFromReg:    break;
126       case ISD::CopyToReg:      NumberDeps++;  break;
127       case ISD::INLINEASM:      break;
128       case ISD::INLINEASM_BR:   break;
129     }
130     if (!ScegN->isMachineOpcode())
131       continue;
132 
133     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
134       const SDValue &Op = ScegN->getOperand(i);
135       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
136       if (TLI->isTypeLegal(VT)
137           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
138         NumberDeps++;
139         break;
140       }
141     }
142   }
143   return NumberDeps;
144 }
145 
146 static unsigned numberCtrlDepsInSU(SUnit *SU) {
147   unsigned NumberDeps = 0;
148   for (const SDep &Succ : SU->Succs)
149     if (Succ.isCtrl())
150       NumberDeps++;
151 
152   return NumberDeps;
153 }
154 
155 static unsigned numberCtrlPredInSU(SUnit *SU) {
156   unsigned NumberDeps = 0;
157   for (SDep &Pred : SU->Preds)
158     if (Pred.isCtrl())
159       NumberDeps++;
160 
161   return NumberDeps;
162 }
163 
164 ///
165 /// Initialize nodes.
166 ///
167 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
168   SUnits = &sunits;
169   NumNodesSolelyBlocking.resize(SUnits->size(), 0);
170 
171   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
172     SUnit *SU = &(*SUnits)[i];
173     initNumRegDefsLeft(SU);
174     SU->NodeQueueId = 0;
175   }
176 }
177 
178 /// This heuristic is used if DFA scheduling is not desired
179 /// for some VLIW platform.
180 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
181   // The isScheduleHigh flag allows nodes with wraparound dependencies that
182   // cannot easily be modeled as edges with latencies to be scheduled as
183   // soon as possible in a top-down schedule.
184   if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
185     return false;
186 
187   if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
188     return true;
189 
190   unsigned LHSNum = LHS->NodeNum;
191   unsigned RHSNum = RHS->NodeNum;
192 
193   // The most important heuristic is scheduling the critical path.
194   unsigned LHSLatency = PQ->getLatency(LHSNum);
195   unsigned RHSLatency = PQ->getLatency(RHSNum);
196   if (LHSLatency < RHSLatency) return true;
197   if (LHSLatency > RHSLatency) return false;
198 
199   // After that, if two nodes have identical latencies, look to see if one will
200   // unblock more other nodes than the other.
201   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
202   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
203   if (LHSBlocked < RHSBlocked) return true;
204   if (LHSBlocked > RHSBlocked) return false;
205 
206   // Finally, just to provide a stable ordering, use the node number as a
207   // deciding factor.
208   return LHSNum < RHSNum;
209 }
210 
211 
212 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
213 /// of SU, return it, otherwise return null.
214 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
215   SUnit *OnlyAvailablePred = nullptr;
216   for (const SDep &Pred : SU->Preds) {
217     SUnit &PredSU = *Pred.getSUnit();
218     if (!PredSU.isScheduled) {
219       // We found an available, but not scheduled, predecessor.  If it's the
220       // only one we have found, keep track of it... otherwise give up.
221       if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
222         return nullptr;
223       OnlyAvailablePred = &PredSU;
224     }
225   }
226   return OnlyAvailablePred;
227 }
228 
229 void ResourcePriorityQueue::push(SUnit *SU) {
230   // Look at all of the successors of this node.  Count the number of nodes that
231   // this node is the sole unscheduled node for.
232   unsigned NumNodesBlocking = 0;
233   for (const SDep &Succ : SU->Succs)
234     if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
235       ++NumNodesBlocking;
236 
237   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
238   Queue.push_back(SU);
239 }
240 
241 /// Check if scheduling of this SU is possible
242 /// in the current packet.
243 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
244   if (!SU || !SU->getNode())
245     return false;
246 
247   // If this is a compound instruction,
248   // it is likely to be a call. Do not delay it.
249   if (SU->getNode()->getGluedNode())
250     return true;
251 
252   // First see if the pipeline could receive this instruction
253   // in the current cycle.
254   if (SU->getNode()->isMachineOpcode())
255     switch (SU->getNode()->getMachineOpcode()) {
256     default:
257       if (!ResourcesModel->canReserveResources(&TII->get(
258           SU->getNode()->getMachineOpcode())))
259            return false;
260       break;
261     case TargetOpcode::EXTRACT_SUBREG:
262     case TargetOpcode::INSERT_SUBREG:
263     case TargetOpcode::SUBREG_TO_REG:
264     case TargetOpcode::REG_SEQUENCE:
265     case TargetOpcode::IMPLICIT_DEF:
266         break;
267     }
268 
269   // Now see if there are no other dependencies
270   // to instructions already in the packet.
271   for (unsigned i = 0, e = Packet.size(); i != e; ++i)
272     for (const SDep &Succ : Packet[i]->Succs) {
273       // Since we do not add pseudos to packets, might as well
274       // ignore order deps.
275       if (Succ.isCtrl())
276         continue;
277 
278       if (Succ.getSUnit() == SU)
279         return false;
280     }
281 
282   return true;
283 }
284 
285 /// Keep track of available resources.
286 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
287   // If this SU does not fit in the packet
288   // start a new one.
289   if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
290     ResourcesModel->clearResources();
291     Packet.clear();
292   }
293 
294   if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
295     switch (SU->getNode()->getMachineOpcode()) {
296     default:
297       ResourcesModel->reserveResources(&TII->get(
298         SU->getNode()->getMachineOpcode()));
299       break;
300     case TargetOpcode::EXTRACT_SUBREG:
301     case TargetOpcode::INSERT_SUBREG:
302     case TargetOpcode::SUBREG_TO_REG:
303     case TargetOpcode::REG_SEQUENCE:
304     case TargetOpcode::IMPLICIT_DEF:
305       break;
306     }
307     Packet.push_back(SU);
308   }
309   // Forcefully end packet for PseudoOps.
310   else {
311     ResourcesModel->clearResources();
312     Packet.clear();
313   }
314 
315   // If packet is now full, reset the state so in the next cycle
316   // we start fresh.
317   if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
318     ResourcesModel->clearResources();
319     Packet.clear();
320   }
321 }
322 
323 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
324   int RegBalance = 0;
325 
326   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
327     return RegBalance;
328 
329   // Gen estimate.
330   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
331       MVT VT = SU->getNode()->getSimpleValueType(i);
332       if (TLI->isTypeLegal(VT)
333           && TLI->getRegClassFor(VT)
334           && TLI->getRegClassFor(VT)->getID() == RCId)
335         RegBalance += numberRCValSuccInSU(SU, RCId);
336   }
337   // Kill estimate.
338   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
339       const SDValue &Op = SU->getNode()->getOperand(i);
340       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
341       if (isa<ConstantSDNode>(Op.getNode()))
342         continue;
343 
344       if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
345           && TLI->getRegClassFor(VT)->getID() == RCId)
346         RegBalance -= numberRCValPredInSU(SU, RCId);
347   }
348   return RegBalance;
349 }
350 
351 /// Estimates change in reg pressure from this SU.
352 /// It is achieved by trivial tracking of defined
353 /// and used vregs in dependent instructions.
354 /// The RawPressure flag makes this function to ignore
355 /// existing reg file sizes, and report raw def/use
356 /// balance.
357 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
358   int RegBalance = 0;
359 
360   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
361     return RegBalance;
362 
363   if (RawPressure) {
364     for (const TargetRegisterClass *RC : TRI->regclasses())
365       RegBalance += rawRegPressureDelta(SU, RC->getID());
366   }
367   else {
368     for (const TargetRegisterClass *RC : TRI->regclasses()) {
369       if ((RegPressure[RC->getID()] +
370            rawRegPressureDelta(SU, RC->getID()) > 0) &&
371           (RegPressure[RC->getID()] +
372            rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
373         RegBalance += rawRegPressureDelta(SU, RC->getID());
374     }
375   }
376 
377   return RegBalance;
378 }
379 
380 // Constants used to denote relative importance of
381 // heuristic components for cost computation.
382 static const unsigned PriorityOne = 200;
383 static const unsigned PriorityTwo = 50;
384 static const unsigned PriorityThree = 15;
385 static const unsigned PriorityFour = 5;
386 static const unsigned ScaleOne = 20;
387 static const unsigned ScaleTwo = 10;
388 static const unsigned ScaleThree = 5;
389 static const unsigned FactorOne = 2;
390 
391 /// Returns single number reflecting benefit of scheduling SU
392 /// in the current cycle.
393 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
394   // Initial trivial priority.
395   int ResCount = 1;
396 
397   // Do not waste time on a node that is already scheduled.
398   if (SU->isScheduled)
399     return ResCount;
400 
401   // Forced priority is high.
402   if (SU->isScheduleHigh)
403     ResCount += PriorityOne;
404 
405   // Adaptable scheduling
406   // A small, but very parallel
407   // region, where reg pressure is an issue.
408   if (HorizontalVerticalBalance > RegPressureThreshold) {
409     // Critical path first
410     ResCount += (SU->getHeight() * ScaleTwo);
411     // If resources are available for it, multiply the
412     // chance of scheduling.
413     if (isResourceAvailable(SU))
414       ResCount <<= FactorOne;
415 
416     // Consider change to reg pressure from scheduling
417     // this SU.
418     ResCount -= (regPressureDelta(SU,true) * ScaleOne);
419   }
420   // Default heuristic, greeady and
421   // critical path driven.
422   else {
423     // Critical path first.
424     ResCount += (SU->getHeight() * ScaleTwo);
425     // Now see how many instructions is blocked by this SU.
426     ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
427     // If resources are available for it, multiply the
428     // chance of scheduling.
429     if (isResourceAvailable(SU))
430       ResCount <<= FactorOne;
431 
432     ResCount -= (regPressureDelta(SU) * ScaleTwo);
433   }
434 
435   // These are platform-specific things.
436   // Will need to go into the back end
437   // and accessed from here via a hook.
438   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
439     if (N->isMachineOpcode()) {
440       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
441       if (TID.isCall())
442         ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
443     }
444     else
445       switch (N->getOpcode()) {
446       default:  break;
447       case ISD::TokenFactor:
448       case ISD::CopyFromReg:
449       case ISD::CopyToReg:
450         ResCount += PriorityFour;
451         break;
452 
453       case ISD::INLINEASM:
454       case ISD::INLINEASM_BR:
455         ResCount += PriorityThree;
456         break;
457       }
458   }
459   return ResCount;
460 }
461 
462 
463 /// Main resource tracking point.
464 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
465   // Use NULL entry as an event marker to reset
466   // the DFA state.
467   if (!SU) {
468     ResourcesModel->clearResources();
469     Packet.clear();
470     return;
471   }
472 
473   const SDNode *ScegN = SU->getNode();
474   // Update reg pressure tracking.
475   // First update current node.
476   if (ScegN->isMachineOpcode()) {
477     // Estimate generated regs.
478     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
479       MVT VT = ScegN->getSimpleValueType(i);
480 
481       if (TLI->isTypeLegal(VT)) {
482         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
483         if (RC)
484           RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
485       }
486     }
487     // Estimate killed regs.
488     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
489       const SDValue &Op = ScegN->getOperand(i);
490       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
491 
492       if (TLI->isTypeLegal(VT)) {
493         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
494         if (RC) {
495           if (RegPressure[RC->getID()] >
496             (numberRCValPredInSU(SU, RC->getID())))
497             RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
498           else RegPressure[RC->getID()] = 0;
499         }
500       }
501     }
502     for (SDep &Pred : SU->Preds) {
503       if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
504         continue;
505       --Pred.getSUnit()->NumRegDefsLeft;
506     }
507   }
508 
509   // Reserve resources for this SU.
510   reserveResources(SU);
511 
512   // Adjust number of parallel live ranges.
513   // Heuristic is simple - node with no data successors reduces
514   // number of live ranges. All others, increase it.
515   unsigned NumberNonControlDeps = 0;
516 
517   for (const SDep &Succ : SU->Succs) {
518     adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
519     if (!Succ.isCtrl())
520       NumberNonControlDeps++;
521   }
522 
523   if (!NumberNonControlDeps) {
524     if (ParallelLiveRanges >= SU->NumPreds)
525       ParallelLiveRanges -= SU->NumPreds;
526     else
527       ParallelLiveRanges = 0;
528 
529   }
530   else
531     ParallelLiveRanges += SU->NumRegDefsLeft;
532 
533   // Track parallel live chains.
534   HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
535   HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
536 }
537 
538 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
539   unsigned  NodeNumDefs = 0;
540   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
541     if (N->isMachineOpcode()) {
542       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
543       // No register need be allocated for this.
544       if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
545         NodeNumDefs = 0;
546         break;
547       }
548       NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
549     }
550     else
551       switch(N->getOpcode()) {
552         default:     break;
553         case ISD::CopyFromReg:
554           NodeNumDefs++;
555           break;
556         case ISD::INLINEASM:
557         case ISD::INLINEASM_BR:
558           NodeNumDefs++;
559           break;
560       }
561 
562   SU->NumRegDefsLeft = NodeNumDefs;
563 }
564 
565 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
566 /// scheduled.  If SU is not itself available, then there is at least one
567 /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
568 /// unscheduled predecessor, we want to increase its priority: it getting
569 /// scheduled will make this node available, so it is better than some other
570 /// node of the same priority that will not make a node available.
571 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
572   if (SU->isAvailable) return;  // All preds scheduled.
573 
574   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
575   if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
576     return;
577 
578   // Okay, we found a single predecessor that is available, but not scheduled.
579   // Since it is available, it must be in the priority queue.  First remove it.
580   remove(OnlyAvailablePred);
581 
582   // Reinsert the node into the priority queue, which recomputes its
583   // NumNodesSolelyBlocking value.
584   push(OnlyAvailablePred);
585 }
586 
587 
588 /// Main access point - returns next instructions
589 /// to be placed in scheduling sequence.
590 SUnit *ResourcePriorityQueue::pop() {
591   if (empty())
592     return nullptr;
593 
594   std::vector<SUnit *>::iterator Best = Queue.begin();
595   if (!DisableDFASched) {
596     int BestCost = SUSchedulingCost(*Best);
597     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
598 
599       if (SUSchedulingCost(*I) > BestCost) {
600         BestCost = SUSchedulingCost(*I);
601         Best = I;
602       }
603     }
604   }
605   // Use default TD scheduling mechanism.
606   else {
607     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
608       if (Picker(*Best, *I))
609         Best = I;
610   }
611 
612   SUnit *V = *Best;
613   if (Best != std::prev(Queue.end()))
614     std::swap(*Best, Queue.back());
615 
616   Queue.pop_back();
617 
618   return V;
619 }
620 
621 
622 void ResourcePriorityQueue::remove(SUnit *SU) {
623   assert(!Queue.empty() && "Queue is empty!");
624   std::vector<SUnit *>::iterator I = find(Queue, SU);
625   if (I != std::prev(Queue.end()))
626     std::swap(*I, Queue.back());
627 
628   Queue.pop_back();
629 }
630