xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides an interface for customizing the standard MachineScheduler
10 // pass. Note that the entire pass may be replaced as follows:
11 //
12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) {
13 //   PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID);
14 //   ...}
15 //
16 // The MachineScheduler pass is only responsible for choosing the regions to be
17 // scheduled. Targets can override the DAG builder and scheduler without
18 // replacing the pass as follows:
19 //
20 // ScheduleDAGInstrs *<Target>PassConfig::
21 // createMachineScheduler(MachineSchedContext *C) {
22 //   return new CustomMachineScheduler(C);
23 // }
24 //
25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
26 // scheduling while updating the instruction stream, register pressure, and live
27 // intervals. Most targets don't need to override the DAG builder and list
28 // scheduler, but subtargets that require custom scheduling heuristics may
29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for
30 // selecting the highest priority node from the list:
31 //
32 // ScheduleDAGInstrs *<Target>PassConfig::
33 // createMachineScheduler(MachineSchedContext *C) {
34 //   return new ScheduleDAGMILive(C, CustomStrategy(C));
35 // }
36 //
37 // The DAG builder can also be customized in a sense by adding DAG mutations
38 // that will run after DAG building and before list scheduling. DAG mutations
39 // can adjust dependencies based on target-specific knowledge or add weak edges
40 // to aid heuristics:
41 //
42 // ScheduleDAGInstrs *<Target>PassConfig::
43 // createMachineScheduler(MachineSchedContext *C) {
44 //   ScheduleDAGMI *DAG = createGenericSchedLive(C);
45 //   DAG->addMutation(new CustomDAGMutation(...));
46 //   return DAG;
47 // }
48 //
49 // A target that supports alternative schedulers can use the
50 // MachineSchedRegistry to allow command line selection. This can be done by
51 // implementing the following boilerplate:
52 //
53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) {
54 //  return new CustomMachineScheduler(C);
55 // }
56 // static MachineSchedRegistry
57 // SchedCustomRegistry("custom", "Run my target's custom scheduler",
58 //                     createCustomMachineSched);
59 //
60 //
61 // Finally, subtargets that don't need to implement custom heuristics but would
62 // like to configure the GenericScheduler's policy for a given scheduler region,
63 // including scheduling direction and register pressure tracking policy, can do
64 // this:
65 //
66 // void <SubTarget>Subtarget::
67 // overrideSchedPolicy(MachineSchedPolicy &Policy,
68 //                     unsigned NumRegionInstrs) const {
69 //   Policy.<Flag> = true;
70 // }
71 //
72 //===----------------------------------------------------------------------===//
73 
74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
75 #define LLVM_CODEGEN_MACHINESCHEDULER_H
76 
77 #include "llvm/ADT/APInt.h"
78 #include "llvm/ADT/ArrayRef.h"
79 #include "llvm/ADT/BitVector.h"
80 #include "llvm/ADT/STLExtras.h"
81 #include "llvm/ADT/SmallVector.h"
82 #include "llvm/ADT/StringRef.h"
83 #include "llvm/ADT/Twine.h"
84 #include "llvm/CodeGen/MachineBasicBlock.h"
85 #include "llvm/CodeGen/MachinePassRegistry.h"
86 #include "llvm/CodeGen/RegisterPressure.h"
87 #include "llvm/CodeGen/ScheduleDAG.h"
88 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
89 #include "llvm/CodeGen/ScheduleDAGMutation.h"
90 #include "llvm/CodeGen/TargetSchedule.h"
91 #include "llvm/Support/CommandLine.h"
92 #include "llvm/Support/ErrorHandling.h"
93 #include <algorithm>
94 #include <cassert>
95 #include <llvm/Support/raw_ostream.h>
96 #include <memory>
97 #include <string>
98 #include <vector>
99 
100 namespace llvm {
101 
102 extern cl::opt<bool> ForceTopDown;
103 extern cl::opt<bool> ForceBottomUp;
104 extern cl::opt<bool> VerifyScheduling;
105 #ifndef NDEBUG
106 extern cl::opt<bool> ViewMISchedDAGs;
107 extern cl::opt<bool> PrintDAGs;
108 #else
109 extern const bool ViewMISchedDAGs;
110 extern const bool PrintDAGs;
111 #endif
112 
113 class AAResults;
114 class LiveIntervals;
115 class MachineDominatorTree;
116 class MachineFunction;
117 class MachineInstr;
118 class MachineLoopInfo;
119 class RegisterClassInfo;
120 class SchedDFSResult;
121 class ScheduleHazardRecognizer;
122 class TargetInstrInfo;
123 class TargetPassConfig;
124 class TargetRegisterInfo;
125 
126 /// MachineSchedContext provides enough context from the MachineScheduler pass
127 /// for the target to instantiate a scheduler.
128 struct MachineSchedContext {
129   MachineFunction *MF = nullptr;
130   const MachineLoopInfo *MLI = nullptr;
131   const MachineDominatorTree *MDT = nullptr;
132   const TargetPassConfig *PassConfig = nullptr;
133   AAResults *AA = nullptr;
134   LiveIntervals *LIS = nullptr;
135 
136   RegisterClassInfo *RegClassInfo;
137 
138   MachineSchedContext();
139   MachineSchedContext &operator=(const MachineSchedContext &other) = delete;
140   MachineSchedContext(const MachineSchedContext &other) = delete;
141   virtual ~MachineSchedContext();
142 };
143 
144 /// MachineSchedRegistry provides a selection of available machine instruction
145 /// schedulers.
146 class MachineSchedRegistry
147     : public MachinePassRegistryNode<
148           ScheduleDAGInstrs *(*)(MachineSchedContext *)> {
149 public:
150   using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *);
151 
152   // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
153   using FunctionPassCtor = ScheduleDAGCtor;
154 
155   static MachinePassRegistry<ScheduleDAGCtor> Registry;
156 
MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)157   MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
158       : MachinePassRegistryNode(N, D, C) {
159     Registry.Add(this);
160   }
161 
~MachineSchedRegistry()162   ~MachineSchedRegistry() { Registry.Remove(this); }
163 
164   // Accessors.
165   //
getNext()166   MachineSchedRegistry *getNext() const {
167     return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
168   }
169 
getList()170   static MachineSchedRegistry *getList() {
171     return (MachineSchedRegistry *)Registry.getList();
172   }
173 
setListener(MachinePassRegistryListener<FunctionPassCtor> * L)174   static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
175     Registry.setListener(L);
176   }
177 };
178 
179 class ScheduleDAGMI;
180 
181 /// Define a generic scheduling policy for targets that don't provide their own
182 /// MachineSchedStrategy. This can be overriden for each scheduling region
183 /// before building the DAG.
184 struct MachineSchedPolicy {
185   // Allow the scheduler to disable register pressure tracking.
186   bool ShouldTrackPressure = false;
187   /// Track LaneMasks to allow reordering of independent subregister writes
188   /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks()
189   bool ShouldTrackLaneMasks = false;
190 
191   // Allow the scheduler to force top-down or bottom-up scheduling. If neither
192   // is true, the scheduler runs in both directions and converges.
193   bool OnlyTopDown = false;
194   bool OnlyBottomUp = false;
195 
196   // Disable heuristic that tries to fetch nodes from long dependency chains
197   // first.
198   bool DisableLatencyHeuristic = false;
199 
200   // Compute DFSResult for use in scheduling heuristics.
201   bool ComputeDFSResult = false;
202 
203   MachineSchedPolicy() = default;
204 };
205 
206 /// MachineSchedStrategy - Interface to the scheduling algorithm used by
207 /// ScheduleDAGMI.
208 ///
209 /// Initialization sequence:
210 ///   initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots
211 class MachineSchedStrategy {
212   virtual void anchor();
213 
214 public:
215   virtual ~MachineSchedStrategy() = default;
216 
217   /// Optionally override the per-region scheduling policy.
initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)218   virtual void initPolicy(MachineBasicBlock::iterator Begin,
219                           MachineBasicBlock::iterator End,
220                           unsigned NumRegionInstrs) {}
221 
dumpPolicy()222   virtual void dumpPolicy() const {}
223 
224   /// Check if pressure tracking is needed before building the DAG and
225   /// initializing this strategy. Called after initPolicy.
shouldTrackPressure()226   virtual bool shouldTrackPressure() const { return true; }
227 
228   /// Returns true if lanemasks should be tracked. LaneMask tracking is
229   /// necessary to reorder independent subregister defs for the same vreg.
230   /// This has to be enabled in combination with shouldTrackPressure().
shouldTrackLaneMasks()231   virtual bool shouldTrackLaneMasks() const { return false; }
232 
233   // If this method returns true, handling of the scheduling regions
234   // themselves (in case of a scheduling boundary in MBB) will be done
235   // beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()236   virtual bool doMBBSchedRegionsTopDown() const { return false; }
237 
238   /// Initialize the strategy after building the DAG for a new region.
239   virtual void initialize(ScheduleDAGMI *DAG) = 0;
240 
241   /// Tell the strategy that MBB is about to be processed.
enterMBB(MachineBasicBlock * MBB)242   virtual void enterMBB(MachineBasicBlock *MBB) {};
243 
244   /// Tell the strategy that current MBB is done.
leaveMBB()245   virtual void leaveMBB() {};
246 
247   /// Notify this strategy that all roots have been released (including those
248   /// that depend on EntrySU or ExitSU).
registerRoots()249   virtual void registerRoots() {}
250 
251   /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
252   /// schedule the node at the top of the unscheduled region. Otherwise it will
253   /// be scheduled at the bottom.
254   virtual SUnit *pickNode(bool &IsTopNode) = 0;
255 
256   /// Scheduler callback to notify that a new subtree is scheduled.
scheduleTree(unsigned SubtreeID)257   virtual void scheduleTree(unsigned SubtreeID) {}
258 
259   /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
260   /// instruction and updated scheduled/remaining flags in the DAG nodes.
261   virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
262 
263   /// When all predecessor dependencies have been resolved, free this node for
264   /// top-down scheduling.
265   virtual void releaseTopNode(SUnit *SU) = 0;
266 
267   /// When all successor dependencies have been resolved, free this node for
268   /// bottom-up scheduling.
269   virtual void releaseBottomNode(SUnit *SU) = 0;
270 };
271 
272 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
273 /// schedules machine instructions according to the given MachineSchedStrategy
274 /// without much extra book-keeping. This is the common functionality between
275 /// PreRA and PostRA MachineScheduler.
276 class ScheduleDAGMI : public ScheduleDAGInstrs {
277 protected:
278   AAResults *AA;
279   LiveIntervals *LIS;
280   std::unique_ptr<MachineSchedStrategy> SchedImpl;
281 
282   /// Ordered list of DAG postprocessing steps.
283   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
284 
285   /// The top of the unscheduled zone.
286   MachineBasicBlock::iterator CurrentTop;
287 
288   /// The bottom of the unscheduled zone.
289   MachineBasicBlock::iterator CurrentBottom;
290 
291   /// Record the next node in a scheduled cluster.
292   const SUnit *NextClusterPred = nullptr;
293   const SUnit *NextClusterSucc = nullptr;
294 
295 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
296   /// The number of instructions scheduled so far. Used to cut off the
297   /// scheduler at the point determined by misched-cutoff.
298   unsigned NumInstrsScheduled = 0;
299 #endif
300 
301 public:
ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)302   ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
303                 bool RemoveKillFlags)
304       : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
305         LIS(C->LIS), SchedImpl(std::move(S)) {}
306 
307   // Provide a vtable anchor
308   ~ScheduleDAGMI() override;
309 
310   /// If this method returns true, handling of the scheduling regions
311   /// themselves (in case of a scheduling boundary in MBB) will be done
312   /// beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()313   bool doMBBSchedRegionsTopDown() const override {
314     return SchedImpl->doMBBSchedRegionsTopDown();
315   }
316 
317   // Returns LiveIntervals instance for use in DAG mutators and such.
getLIS()318   LiveIntervals *getLIS() const { return LIS; }
319 
320   /// Return true if this DAG supports VReg liveness and RegPressure.
hasVRegLiveness()321   virtual bool hasVRegLiveness() const { return false; }
322 
323   /// Add a postprocessing step to the DAG builder.
324   /// Mutations are applied in the order that they are added after normal DAG
325   /// building and before MachineSchedStrategy initialization.
326   ///
327   /// ScheduleDAGMI takes ownership of the Mutation object.
addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)328   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
329     if (Mutation)
330       Mutations.push_back(std::move(Mutation));
331   }
332 
top()333   MachineBasicBlock::iterator top() const { return CurrentTop; }
bottom()334   MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
335 
336   /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
337   /// region. This covers all instructions in a block, while schedule() may only
338   /// cover a subset.
339   void enterRegion(MachineBasicBlock *bb,
340                    MachineBasicBlock::iterator begin,
341                    MachineBasicBlock::iterator end,
342                    unsigned regioninstrs) override;
343 
344   /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
345   /// reorderable instructions.
346   void schedule() override;
347 
348   void startBlock(MachineBasicBlock *bb) override;
349   void finishBlock() override;
350 
351   /// Change the position of an instruction within the basic block and update
352   /// live ranges and region boundary iterators.
353   void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
354 
getNextClusterPred()355   const SUnit *getNextClusterPred() const { return NextClusterPred; }
356 
getNextClusterSucc()357   const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
358 
359   void viewGraph(const Twine &Name, const Twine &Title) override;
360   void viewGraph() override;
361 
362 protected:
363   // Top-Level entry points for the schedule() driver...
364 
365   /// Apply each ScheduleDAGMutation step in order. This allows different
366   /// instances of ScheduleDAGMI to perform custom DAG postprocessing.
367   void postProcessDAG();
368 
369   /// Release ExitSU predecessors and setup scheduler queues.
370   void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
371 
372   /// Update scheduler DAG and queues after scheduling an instruction.
373   void updateQueues(SUnit *SU, bool IsTopNode);
374 
375   /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
376   void placeDebugValues();
377 
378   /// dump the scheduled Sequence.
379   void dumpSchedule() const;
380   /// Print execution trace of the schedule top-down or bottom-up.
381   void dumpScheduleTraceTopDown() const;
382   void dumpScheduleTraceBottomUp() const;
383 
384   // Lesser helpers...
385   bool checkSchedLimit();
386 
387   void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
388                              SmallVectorImpl<SUnit*> &BotRoots);
389 
390   void releaseSucc(SUnit *SU, SDep *SuccEdge);
391   void releaseSuccessors(SUnit *SU);
392   void releasePred(SUnit *SU, SDep *PredEdge);
393   void releasePredecessors(SUnit *SU);
394 };
395 
396 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules
397 /// machine instructions while updating LiveIntervals and tracking regpressure.
398 class ScheduleDAGMILive : public ScheduleDAGMI {
399 protected:
400   RegisterClassInfo *RegClassInfo;
401 
402   /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
403   /// will be empty.
404   SchedDFSResult *DFSResult = nullptr;
405   BitVector ScheduledTrees;
406 
407   MachineBasicBlock::iterator LiveRegionEnd;
408 
409   /// Maps vregs to the SUnits of their uses in the current scheduling region.
410   VReg2SUnitMultiMap VRegUses;
411 
412   // Map each SU to its summary of pressure changes. This array is updated for
413   // liveness during bottom-up scheduling. Top-down scheduling may proceed but
414   // has no affect on the pressure diffs.
415   PressureDiffs SUPressureDiffs;
416 
417   /// Register pressure in this region computed by initRegPressure.
418   bool ShouldTrackPressure = false;
419   bool ShouldTrackLaneMasks = false;
420   IntervalPressure RegPressure;
421   RegPressureTracker RPTracker;
422 
423   /// List of pressure sets that exceed the target's pressure limit before
424   /// scheduling, listed in increasing set ID order. Each pressure set is paired
425   /// with its max pressure in the currently scheduled regions.
426   std::vector<PressureChange> RegionCriticalPSets;
427 
428   /// The top of the unscheduled zone.
429   IntervalPressure TopPressure;
430   RegPressureTracker TopRPTracker;
431 
432   /// The bottom of the unscheduled zone.
433   IntervalPressure BotPressure;
434   RegPressureTracker BotRPTracker;
435 
436 public:
ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)437   ScheduleDAGMILive(MachineSchedContext *C,
438                     std::unique_ptr<MachineSchedStrategy> S)
439       : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false),
440         RegClassInfo(C->RegClassInfo), RPTracker(RegPressure),
441         TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
442 
443   ~ScheduleDAGMILive() override;
444 
445   /// Return true if this DAG supports VReg liveness and RegPressure.
hasVRegLiveness()446   bool hasVRegLiveness() const override { return true; }
447 
448   /// Return true if register pressure tracking is enabled.
isTrackingPressure()449   bool isTrackingPressure() const { return ShouldTrackPressure; }
450 
451   /// Get current register pressure for the top scheduled instructions.
getTopPressure()452   const IntervalPressure &getTopPressure() const { return TopPressure; }
getTopRPTracker()453   const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
454 
455   /// Get current register pressure for the bottom scheduled instructions.
getBotPressure()456   const IntervalPressure &getBotPressure() const { return BotPressure; }
getBotRPTracker()457   const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
458 
459   /// Get register pressure for the entire scheduling region before scheduling.
getRegPressure()460   const IntervalPressure &getRegPressure() const { return RegPressure; }
461 
getRegionCriticalPSets()462   const std::vector<PressureChange> &getRegionCriticalPSets() const {
463     return RegionCriticalPSets;
464   }
465 
getPressureDiff(const SUnit * SU)466   PressureDiff &getPressureDiff(const SUnit *SU) {
467     return SUPressureDiffs[SU->NodeNum];
468   }
getPressureDiff(const SUnit * SU)469   const PressureDiff &getPressureDiff(const SUnit *SU) const {
470     return SUPressureDiffs[SU->NodeNum];
471   }
472 
473   /// Compute a DFSResult after DAG building is complete, and before any
474   /// queue comparisons.
475   void computeDFSResult();
476 
477   /// Return a non-null DFS result if the scheduling strategy initialized it.
getDFSResult()478   const SchedDFSResult *getDFSResult() const { return DFSResult; }
479 
getScheduledTrees()480   BitVector &getScheduledTrees() { return ScheduledTrees; }
481 
482   /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
483   /// region. This covers all instructions in a block, while schedule() may only
484   /// cover a subset.
485   void enterRegion(MachineBasicBlock *bb,
486                    MachineBasicBlock::iterator begin,
487                    MachineBasicBlock::iterator end,
488                    unsigned regioninstrs) override;
489 
490   /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
491   /// reorderable instructions.
492   void schedule() override;
493 
494   /// Compute the cyclic critical path through the DAG.
495   unsigned computeCyclicCriticalPath();
496 
497   void dump() const override;
498 
499 protected:
500   // Top-Level entry points for the schedule() driver...
501 
502   /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
503   /// enabled. This sets up three trackers. RPTracker will cover the entire DAG
504   /// region, TopTracker and BottomTracker will be initialized to the top and
505   /// bottom of the DAG region without covereing any unscheduled instruction.
506   void buildDAGWithRegPressure();
507 
508   /// Release ExitSU predecessors and setup scheduler queues. Re-position
509   /// the Top RP tracker in case the region beginning has changed.
510   void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
511 
512   /// Move an instruction and update register pressure.
513   void scheduleMI(SUnit *SU, bool IsTopNode);
514 
515   // Lesser helpers...
516 
517   void initRegPressure();
518 
519   void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses);
520 
521   void updateScheduledPressure(const SUnit *SU,
522                                const std::vector<unsigned> &NewMaxPressure);
523 
524   void collectVRegUses(SUnit &SU);
525 };
526 
527 //===----------------------------------------------------------------------===//
528 ///
529 /// Helpers for implementing custom MachineSchedStrategy classes. These take
530 /// care of the book-keeping associated with list scheduling heuristics.
531 ///
532 //===----------------------------------------------------------------------===//
533 
534 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
535 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
536 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
537 ///
538 /// This is a convenience class that may be used by implementations of
539 /// MachineSchedStrategy.
540 class ReadyQueue {
541   unsigned ID;
542   std::string Name;
543   std::vector<SUnit*> Queue;
544 
545 public:
ReadyQueue(unsigned id,const Twine & name)546   ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
547 
getID()548   unsigned getID() const { return ID; }
549 
getName()550   StringRef getName() const { return Name; }
551 
552   // SU is in this queue if it's NodeQueueID is a superset of this ID.
isInQueue(SUnit * SU)553   bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
554 
empty()555   bool empty() const { return Queue.empty(); }
556 
clear()557   void clear() { Queue.clear(); }
558 
size()559   unsigned size() const { return Queue.size(); }
560 
561   using iterator = std::vector<SUnit*>::iterator;
562 
begin()563   iterator begin() { return Queue.begin(); }
564 
end()565   iterator end() { return Queue.end(); }
566 
elements()567   ArrayRef<SUnit*> elements() { return Queue; }
568 
find(SUnit * SU)569   iterator find(SUnit *SU) { return llvm::find(Queue, SU); }
570 
push(SUnit * SU)571   void push(SUnit *SU) {
572     Queue.push_back(SU);
573     SU->NodeQueueId |= ID;
574   }
575 
remove(iterator I)576   iterator remove(iterator I) {
577     (*I)->NodeQueueId &= ~ID;
578     *I = Queue.back();
579     unsigned idx = I - Queue.begin();
580     Queue.pop_back();
581     return Queue.begin() + idx;
582   }
583 
584   void dump() const;
585 };
586 
587 /// Summarize the unscheduled region.
588 struct SchedRemainder {
589   // Critical path through the DAG in expected latency.
590   unsigned CriticalPath;
591   unsigned CyclicCritPath;
592 
593   // Scaled count of micro-ops left to schedule.
594   unsigned RemIssueCount;
595 
596   bool IsAcyclicLatencyLimited;
597 
598   // Unscheduled resources
599   SmallVector<unsigned, 16> RemainingCounts;
600 
SchedRemainderSchedRemainder601   SchedRemainder() { reset(); }
602 
resetSchedRemainder603   void reset() {
604     CriticalPath = 0;
605     CyclicCritPath = 0;
606     RemIssueCount = 0;
607     IsAcyclicLatencyLimited = false;
608     RemainingCounts.clear();
609   }
610 
611   void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
612 };
613 
614 /// ResourceSegments are a collection of intervals closed on the
615 /// left and opened on the right:
616 ///
617 ///     list{ [a1, b1), [a2, b2), ..., [a_N, b_N) }
618 ///
619 /// The collection has the following properties:
620 ///
621 /// 1. The list is ordered: a_i < b_i and b_i < a_(i+1)
622 ///
623 /// 2. The intervals in the collection do not intersect each other.
624 ///
625 /// A \ref ResourceSegments instance represents the cycle
626 /// reservation history of the instance of and individual resource.
627 class ResourceSegments {
628 public:
629   /// Represents an interval of discrete integer values closed on
630   /// the left and open on the right: [a, b).
631   typedef std::pair<int64_t, int64_t> IntervalTy;
632 
633   /// Adds an interval [a, b) to the collection of the instance.
634   ///
635   /// When adding [a, b[ to the collection, the operation merges the
636   /// adjacent intervals. For example
637   ///
638   ///       0  1  2  3  4  5  6  7  8  9  10
639   ///       [-----)  [--)     [--)
640   ///     +       [--)
641   ///     = [-----------)     [--)
642   ///
643   /// To be able to debug duplicate resource usage, the function has
644   /// assertion that checks that no interval should be added if it
645   /// overlaps any of the intervals in the collection. We can
646   /// require this because by definition a \ref ResourceSegments is
647   /// attached only to an individual resource instance.
648   void add(IntervalTy A, const unsigned CutOff = 10);
649 
650 public:
651   /// Checks whether intervals intersect.
652   static bool intersects(IntervalTy A, IntervalTy B);
653 
654   /// These function return the interval used by a resource in bottom and top
655   /// scheduling.
656   ///
657   /// Consider an instruction that uses resources X0, X1 and X2 as follows:
658   ///
659   /// X0 X1 X1 X2    +--------+-------------+--------------+
660   ///                |Resource|AcquireAtCycle|ReleaseAtCycle|
661   ///                +--------+-------------+--------------+
662   ///                |   X0   |     0       |       1      |
663   ///                +--------+-------------+--------------+
664   ///                |   X1   |     1       |       3      |
665   ///                +--------+-------------+--------------+
666   ///                |   X2   |     3       |       4      |
667   ///                +--------+-------------+--------------+
668   ///
669   /// If we can schedule the instruction at cycle C, we need to
670   /// compute the interval of the resource as follows:
671   ///
672   /// # TOP DOWN SCHEDULING
673   ///
674   /// Cycles scheduling flows to the _right_, in the same direction
675   /// of time.
676   ///
677   ///       C      1      2      3      4      5  ...
678   /// ------|------|------|------|------|------|----->
679   ///       X0     X1     X1     X2   ---> direction of time
680   /// X0    [C, C+1)
681   /// X1           [C+1,      C+3)
682   /// X2                         [C+3, C+4)
683   ///
684   /// Therefore, the formula to compute the interval for a resource
685   /// of an instruction that can be scheduled at cycle C in top-down
686   /// scheduling is:
687   ///
688   ///       [C+AcquireAtCycle, C+ReleaseAtCycle)
689   ///
690   ///
691   /// # BOTTOM UP SCHEDULING
692   ///
693   /// Cycles scheduling flows to the _left_, in opposite direction
694   /// of time.
695   ///
696   /// In bottom up scheduling, the scheduling happens in opposite
697   /// direction to the execution of the cycles of the
698   /// instruction. When the instruction is scheduled at cycle `C`,
699   /// the resources are allocated in the past relative to `C`:
700   ///
701   ///       2      1      C     -1     -2     -3     -4     -5  ...
702   /// <-----|------|------|------|------|------|------|------|---
703   ///                     X0     X1     X1     X2   ---> direction of time
704   /// X0           (C+1, C]
705   /// X1                  (C,        C-2]
706   /// X2                              (C-2, C-3]
707   ///
708   /// Therefore, the formula to compute the interval for a resource
709   /// of an instruction that can be scheduled at cycle C in bottom-up
710   /// scheduling is:
711   ///
712   ///       [C-ReleaseAtCycle+1, C-AcquireAtCycle+1)
713   ///
714   ///
715   /// NOTE: In both cases, the number of cycles booked by a
716   /// resources is the value (ReleaseAtCycle - AcquireAtCycle).
getResourceIntervalBottom(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)717   static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle,
718                                               unsigned ReleaseAtCycle) {
719     return std::make_pair<long, long>((long)C - (long)ReleaseAtCycle + 1L,
720                                       (long)C - (long)AcquireAtCycle + 1L);
721   }
getResourceIntervalTop(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)722   static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle,
723                                            unsigned ReleaseAtCycle) {
724     return std::make_pair<long, long>((long)C + (long)AcquireAtCycle,
725                                       (long)C + (long)ReleaseAtCycle);
726   }
727 
728 private:
729   /// Finds the first cycle in which a resource can be allocated.
730   ///
731   /// The function uses the \param IntervalBuider [*] to build a
732   /// resource interval [a, b[ out of the input parameters \param
733   /// CurrCycle, \param AcquireAtCycle and \param ReleaseAtCycle.
734   ///
735   /// The function then loops through the intervals in the ResourceSegments
736   /// and shifts the interval [a, b[ and the ReturnCycle to the
737   /// right until there is no intersection between the intervals of
738   /// the \ref ResourceSegments instance and the new shifted [a, b[. When
739   /// this condition is met, the ReturnCycle  (which
740   /// correspond to the cycle in which the resource can be
741   /// allocated) is returned.
742   ///
743   ///               c = CurrCycle in input
744   ///               c   1   2   3   4   5   6   7   8   9   10 ... ---> (time
745   ///               flow)
746   ///  ResourceSegments...  [---)   [-------)           [-----------)
747   ///               c   [1     3[  -> AcquireAtCycle=1, ReleaseAtCycle=3
748   ///                 ++c   [1     3)
749   ///                     ++c   [1     3)
750   ///                         ++c   [1     3)
751   ///                             ++c   [1     3)
752   ///                                 ++c   [1     3)    ---> returns c
753   ///                                 incremented by 5 (c+5)
754   ///
755   ///
756   /// Notice that for bottom-up scheduling the diagram is slightly
757   /// different because the current cycle c is always on the right
758   /// of the interval [a, b) (see \ref
759   /// `getResourceIntervalBottom`). This is because the cycle
760   /// increments for bottom-up scheduling moved in the direction
761   /// opposite to the direction of time:
762   ///
763   ///     --------> direction of time.
764   ///     XXYZZZ    (resource usage)
765   ///     --------> direction of top-down execution cycles.
766   ///     <-------- direction of bottom-up execution cycles.
767   ///
768   /// Even though bottom-up scheduling moves against the flow of
769   /// time, the algorithm used to find the first free slot in between
770   /// intervals is the same as for top-down scheduling.
771   ///
772   /// [*] See \ref `getResourceIntervalTop` and
773   /// \ref `getResourceIntervalBottom` to see how such resource intervals
774   /// are built.
775   unsigned getFirstAvailableAt(
776       unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
777       std::function<IntervalTy(unsigned, unsigned, unsigned)> IntervalBuilder)
778       const;
779 
780 public:
781   /// getFirstAvailableAtFromBottom and getFirstAvailableAtFromTop
782   /// should be merged in a single function in which a function that
783   /// creates the `NewInterval` is passed as a parameter.
getFirstAvailableAtFromBottom(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)784   unsigned getFirstAvailableAtFromBottom(unsigned CurrCycle,
785                                          unsigned AcquireAtCycle,
786                                          unsigned ReleaseAtCycle) const {
787     return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
788                                getResourceIntervalBottom);
789   }
getFirstAvailableAtFromTop(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)790   unsigned getFirstAvailableAtFromTop(unsigned CurrCycle,
791                                       unsigned AcquireAtCycle,
792                                       unsigned ReleaseAtCycle) const {
793     return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
794                                getResourceIntervalTop);
795   }
796 
797 private:
798   std::list<IntervalTy> _Intervals;
799   /// Merge all adjacent intervals in the collection. For all pairs
800   /// of adjacient intervals, it performs [a, b) + [b, c) -> [a, c).
801   ///
802   /// Before performing the merge operation, the intervals are
803   /// sorted with \ref sort_predicate.
804   void sortAndMerge();
805 
806 public:
807   // constructor for empty set
ResourceSegments()808   explicit ResourceSegments(){};
empty()809   bool empty() const { return _Intervals.empty(); }
ResourceSegments(const std::list<IntervalTy> & Intervals)810   explicit ResourceSegments(const std::list<IntervalTy> &Intervals)
811       : _Intervals(Intervals) {
812     sortAndMerge();
813   }
814 
815   friend bool operator==(const ResourceSegments &c1,
816                          const ResourceSegments &c2) {
817     return c1._Intervals == c2._Intervals;
818   }
819   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
820                                        const ResourceSegments &Segments) {
821     os << "{ ";
822     for (auto p : Segments._Intervals)
823       os << "[" << p.first << ", " << p.second << "), ";
824     os << "}\n";
825     return os;
826   }
827 };
828 
829 /// Each Scheduling boundary is associated with ready queues. It tracks the
830 /// current cycle in the direction of movement, and maintains the state
831 /// of "hazards" and other interlocks at the current cycle.
832 class SchedBoundary {
833 public:
834   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
835   enum {
836     TopQID = 1,
837     BotQID = 2,
838     LogMaxQID = 2
839   };
840 
841   ScheduleDAGMI *DAG = nullptr;
842   const TargetSchedModel *SchedModel = nullptr;
843   SchedRemainder *Rem = nullptr;
844 
845   ReadyQueue Available;
846   ReadyQueue Pending;
847 
848   ScheduleHazardRecognizer *HazardRec = nullptr;
849 
850 private:
851   /// True if the pending Q should be checked/updated before scheduling another
852   /// instruction.
853   bool CheckPending;
854 
855   /// Number of cycles it takes to issue the instructions scheduled in this
856   /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
857   /// See getStalls().
858   unsigned CurrCycle;
859 
860   /// Micro-ops issued in the current cycle
861   unsigned CurrMOps;
862 
863   /// MinReadyCycle - Cycle of the soonest available instruction.
864   unsigned MinReadyCycle;
865 
866   // The expected latency of the critical path in this scheduled zone.
867   unsigned ExpectedLatency;
868 
869   // The latency of dependence chains leading into this zone.
870   // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
871   // For each cycle scheduled: DLat -= 1.
872   unsigned DependentLatency;
873 
874   /// Count the scheduled (issued) micro-ops that can be retired by
875   /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
876   unsigned RetiredMOps;
877 
878   // Count scheduled resources that have been executed. Resources are
879   // considered executed if they become ready in the time that it takes to
880   // saturate any resource including the one in question. Counts are scaled
881   // for direct comparison with other resources. Counts can be compared with
882   // MOps * getMicroOpFactor and Latency * getLatencyFactor.
883   SmallVector<unsigned, 16> ExecutedResCounts;
884 
885   /// Cache the max count for a single resource.
886   unsigned MaxExecutedResCount;
887 
888   // Cache the critical resources ID in this scheduled zone.
889   unsigned ZoneCritResIdx;
890 
891   // Is the scheduled region resource limited vs. latency limited.
892   bool IsResourceLimited;
893 
894 public:
895 private:
896   /// Record how resources have been allocated across the cycles of
897   /// the execution.
898   std::map<unsigned, ResourceSegments> ReservedResourceSegments;
899   std::vector<unsigned> ReservedCycles;
900   /// For each PIdx, stores first index into ReservedResourceSegments that
901   /// corresponds to it.
902   ///
903   /// For example, consider the following 3 resources (ResourceCount =
904   /// 3):
905   ///
906   ///   +------------+--------+
907   ///   |ResourceName|NumUnits|
908   ///   +------------+--------+
909   ///   |     X      |    2   |
910   ///   +------------+--------+
911   ///   |     Y      |    3   |
912   ///   +------------+--------+
913   ///   |     Z      |    1   |
914   ///   +------------+--------+
915   ///
916   /// In this case, the total number of resource instances is 6. The
917   /// vector \ref ReservedResourceSegments will have a slot for each instance.
918   /// The vector \ref ReservedCyclesIndex will track at what index the first
919   /// instance of the resource is found in the vector of \ref
920   /// ReservedResourceSegments:
921   ///
922   ///                              Indexes of instances in
923   ///                              ReservedResourceSegments
924   ///
925   ///                              0   1   2   3   4  5
926   /// ReservedCyclesIndex[0] = 0; [X0, X1,
927   /// ReservedCyclesIndex[1] = 2;          Y0, Y1, Y2
928   /// ReservedCyclesIndex[2] = 5;                     Z
929   SmallVector<unsigned, 16> ReservedCyclesIndex;
930 
931   // For each PIdx, stores the resource group IDs of its subunits
932   SmallVector<APInt, 16> ResourceGroupSubUnitMasks;
933 
934 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
935   // Remember the greatest possible stall as an upper bound on the number of
936   // times we should retry the pending queue because of a hazard.
937   unsigned MaxObservedStall;
938 #endif
939 
940 public:
941   /// Pending queues extend the ready queues with the same ID and the
942   /// PendingFlag set.
SchedBoundary(unsigned ID,const Twine & Name)943   SchedBoundary(unsigned ID, const Twine &Name):
944     Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") {
945     reset();
946   }
947   SchedBoundary &operator=(const SchedBoundary &other) = delete;
948   SchedBoundary(const SchedBoundary &other) = delete;
949   ~SchedBoundary();
950 
951   void reset();
952 
953   void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
954             SchedRemainder *rem);
955 
isTop()956   bool isTop() const {
957     return Available.getID() == TopQID;
958   }
959 
960   /// Number of cycles to issue the instructions scheduled in this zone.
getCurrCycle()961   unsigned getCurrCycle() const { return CurrCycle; }
962 
963   /// Micro-ops issued in the current cycle
getCurrMOps()964   unsigned getCurrMOps() const { return CurrMOps; }
965 
966   // The latency of dependence chains leading into this zone.
getDependentLatency()967   unsigned getDependentLatency() const { return DependentLatency; }
968 
969   /// Get the number of latency cycles "covered" by the scheduled
970   /// instructions. This is the larger of the critical path within the zone
971   /// and the number of cycles required to issue the instructions.
getScheduledLatency()972   unsigned getScheduledLatency() const {
973     return std::max(ExpectedLatency, CurrCycle);
974   }
975 
getUnscheduledLatency(SUnit * SU)976   unsigned getUnscheduledLatency(SUnit *SU) const {
977     return isTop() ? SU->getHeight() : SU->getDepth();
978   }
979 
getResourceCount(unsigned ResIdx)980   unsigned getResourceCount(unsigned ResIdx) const {
981     return ExecutedResCounts[ResIdx];
982   }
983 
984   /// Get the scaled count of scheduled micro-ops and resources, including
985   /// executed resources.
getCriticalCount()986   unsigned getCriticalCount() const {
987     if (!ZoneCritResIdx)
988       return RetiredMOps * SchedModel->getMicroOpFactor();
989     return getResourceCount(ZoneCritResIdx);
990   }
991 
992   /// Get a scaled count for the minimum execution time of the scheduled
993   /// micro-ops that are ready to execute by getExecutedCount. Notice the
994   /// feedback loop.
getExecutedCount()995   unsigned getExecutedCount() const {
996     return std::max(CurrCycle * SchedModel->getLatencyFactor(),
997                     MaxExecutedResCount);
998   }
999 
getZoneCritResIdx()1000   unsigned getZoneCritResIdx() const { return ZoneCritResIdx; }
1001 
1002   // Is the scheduled region resource limited vs. latency limited.
isResourceLimited()1003   bool isResourceLimited() const { return IsResourceLimited; }
1004 
1005   /// Get the difference between the given SUnit's ready time and the current
1006   /// cycle.
1007   unsigned getLatencyStallCycles(SUnit *SU);
1008 
1009   unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
1010                                           unsigned ReleaseAtCycle,
1011                                           unsigned AcquireAtCycle);
1012 
1013   std::pair<unsigned, unsigned> getNextResourceCycle(const MCSchedClassDesc *SC,
1014                                                      unsigned PIdx,
1015                                                      unsigned ReleaseAtCycle,
1016                                                      unsigned AcquireAtCycle);
1017 
isUnbufferedGroup(unsigned PIdx)1018   bool isUnbufferedGroup(unsigned PIdx) const {
1019     return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin &&
1020            !SchedModel->getProcResource(PIdx)->BufferSize;
1021   }
1022 
1023   bool checkHazard(SUnit *SU);
1024 
1025   unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
1026 
1027   unsigned getOtherResourceCount(unsigned &OtherCritIdx);
1028 
1029   /// Release SU to make it ready. If it's not in hazard, remove it from
1030   /// pending queue (if already in) and push into available queue.
1031   /// Otherwise, push the SU into pending queue.
1032   ///
1033   /// @param SU The unit to be released.
1034   /// @param ReadyCycle Until which cycle the unit is ready.
1035   /// @param InPQueue Whether SU is already in pending queue.
1036   /// @param Idx Position offset in pending queue (if in it).
1037   void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
1038                    unsigned Idx = 0);
1039 
1040   void bumpCycle(unsigned NextCycle);
1041 
1042   void incExecutedResources(unsigned PIdx, unsigned Count);
1043 
1044   unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx,
1045                          unsigned Cycles, unsigned ReadyCycle,
1046                          unsigned StartAtCycle);
1047 
1048   void bumpNode(SUnit *SU);
1049 
1050   void releasePending();
1051 
1052   void removeReady(SUnit *SU);
1053 
1054   /// Call this before applying any other heuristics to the Available queue.
1055   /// Updates the Available/Pending Q's if necessary and returns the single
1056   /// available instruction, or NULL if there are multiple candidates.
1057   SUnit *pickOnlyChoice();
1058 
1059   /// Dump the state of the information that tracks resource usage.
1060   void dumpReservedCycles() const;
1061   void dumpScheduledState() const;
1062 };
1063 
1064 /// Base class for GenericScheduler. This class maintains information about
1065 /// scheduling candidates based on TargetSchedModel making it easy to implement
1066 /// heuristics for either preRA or postRA scheduling.
1067 class GenericSchedulerBase : public MachineSchedStrategy {
1068 public:
1069   /// Represent the type of SchedCandidate found within a single queue.
1070   /// pickNodeBidirectional depends on these listed by decreasing priority.
1071   enum CandReason : uint8_t {
1072     NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak,
1073     RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
1074     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
1075 
1076 #ifndef NDEBUG
1077   static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
1078 #endif
1079 
1080   /// Policy for scheduling the next instruction in the candidate's zone.
1081   struct CandPolicy {
1082     bool ReduceLatency = false;
1083     unsigned ReduceResIdx = 0;
1084     unsigned DemandResIdx = 0;
1085 
1086     CandPolicy() = default;
1087 
1088     bool operator==(const CandPolicy &RHS) const {
1089       return ReduceLatency == RHS.ReduceLatency &&
1090              ReduceResIdx == RHS.ReduceResIdx &&
1091              DemandResIdx == RHS.DemandResIdx;
1092     }
1093     bool operator!=(const CandPolicy &RHS) const {
1094       return !(*this == RHS);
1095     }
1096   };
1097 
1098   /// Status of an instruction's critical resource consumption.
1099   struct SchedResourceDelta {
1100     // Count critical resources in the scheduled region required by SU.
1101     unsigned CritResources = 0;
1102 
1103     // Count critical resources from another region consumed by SU.
1104     unsigned DemandedResources = 0;
1105 
1106     SchedResourceDelta() = default;
1107 
1108     bool operator==(const SchedResourceDelta &RHS) const {
1109       return CritResources == RHS.CritResources
1110         && DemandedResources == RHS.DemandedResources;
1111     }
1112     bool operator!=(const SchedResourceDelta &RHS) const {
1113       return !operator==(RHS);
1114     }
1115   };
1116 
1117   /// Store the state used by GenericScheduler heuristics, required for the
1118   /// lifetime of one invocation of pickNode().
1119   struct SchedCandidate {
1120     CandPolicy Policy;
1121 
1122     // The best SUnit candidate.
1123     SUnit *SU;
1124 
1125     // The reason for this candidate.
1126     CandReason Reason;
1127 
1128     // Whether this candidate should be scheduled at top/bottom.
1129     bool AtTop;
1130 
1131     // Register pressure values for the best candidate.
1132     RegPressureDelta RPDelta;
1133 
1134     // Critical resource consumption of the best candidate.
1135     SchedResourceDelta ResDelta;
1136 
SchedCandidateSchedCandidate1137     SchedCandidate() { reset(CandPolicy()); }
SchedCandidateSchedCandidate1138     SchedCandidate(const CandPolicy &Policy) { reset(Policy); }
1139 
resetSchedCandidate1140     void reset(const CandPolicy &NewPolicy) {
1141       Policy = NewPolicy;
1142       SU = nullptr;
1143       Reason = NoCand;
1144       AtTop = false;
1145       RPDelta = RegPressureDelta();
1146       ResDelta = SchedResourceDelta();
1147     }
1148 
isValidSchedCandidate1149     bool isValid() const { return SU; }
1150 
1151     // Copy the status of another candidate without changing policy.
setBestSchedCandidate1152     void setBest(SchedCandidate &Best) {
1153       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1154       SU = Best.SU;
1155       Reason = Best.Reason;
1156       AtTop = Best.AtTop;
1157       RPDelta = Best.RPDelta;
1158       ResDelta = Best.ResDelta;
1159     }
1160 
1161     void initResourceDelta(const ScheduleDAGMI *DAG,
1162                            const TargetSchedModel *SchedModel);
1163   };
1164 
1165 protected:
1166   const MachineSchedContext *Context;
1167   const TargetSchedModel *SchedModel = nullptr;
1168   const TargetRegisterInfo *TRI = nullptr;
1169 
1170   SchedRemainder Rem;
1171 
GenericSchedulerBase(const MachineSchedContext * C)1172   GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {}
1173 
1174   void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
1175                  SchedBoundary *OtherZone);
1176 
1177 #ifndef NDEBUG
1178   void traceCandidate(const SchedCandidate &Cand);
1179 #endif
1180 
1181 private:
1182   bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
1183                            bool ComputeRemLatency, unsigned &RemLatency) const;
1184 };
1185 
1186 // Utility functions used by heuristics in tryCandidate().
1187 bool tryLess(int TryVal, int CandVal,
1188              GenericSchedulerBase::SchedCandidate &TryCand,
1189              GenericSchedulerBase::SchedCandidate &Cand,
1190              GenericSchedulerBase::CandReason Reason);
1191 bool tryGreater(int TryVal, int CandVal,
1192                 GenericSchedulerBase::SchedCandidate &TryCand,
1193                 GenericSchedulerBase::SchedCandidate &Cand,
1194                 GenericSchedulerBase::CandReason Reason);
1195 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
1196                 GenericSchedulerBase::SchedCandidate &Cand,
1197                 SchedBoundary &Zone);
1198 bool tryPressure(const PressureChange &TryP,
1199                  const PressureChange &CandP,
1200                  GenericSchedulerBase::SchedCandidate &TryCand,
1201                  GenericSchedulerBase::SchedCandidate &Cand,
1202                  GenericSchedulerBase::CandReason Reason,
1203                  const TargetRegisterInfo *TRI,
1204                  const MachineFunction &MF);
1205 unsigned getWeakLeft(const SUnit *SU, bool isTop);
1206 int biasPhysReg(const SUnit *SU, bool isTop);
1207 
1208 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
1209 /// the schedule.
1210 class GenericScheduler : public GenericSchedulerBase {
1211 public:
GenericScheduler(const MachineSchedContext * C)1212   GenericScheduler(const MachineSchedContext *C):
1213     GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1214     Bot(SchedBoundary::BotQID, "BotQ") {}
1215 
1216   void initPolicy(MachineBasicBlock::iterator Begin,
1217                   MachineBasicBlock::iterator End,
1218                   unsigned NumRegionInstrs) override;
1219 
1220   void dumpPolicy() const override;
1221 
shouldTrackPressure()1222   bool shouldTrackPressure() const override {
1223     return RegionPolicy.ShouldTrackPressure;
1224   }
1225 
shouldTrackLaneMasks()1226   bool shouldTrackLaneMasks() const override {
1227     return RegionPolicy.ShouldTrackLaneMasks;
1228   }
1229 
1230   void initialize(ScheduleDAGMI *dag) override;
1231 
1232   SUnit *pickNode(bool &IsTopNode) override;
1233 
1234   void schedNode(SUnit *SU, bool IsTopNode) override;
1235 
releaseTopNode(SUnit * SU)1236   void releaseTopNode(SUnit *SU) override {
1237     if (SU->isScheduled)
1238       return;
1239 
1240     Top.releaseNode(SU, SU->TopReadyCycle, false);
1241     TopCand.SU = nullptr;
1242   }
1243 
releaseBottomNode(SUnit * SU)1244   void releaseBottomNode(SUnit *SU) override {
1245     if (SU->isScheduled)
1246       return;
1247 
1248     Bot.releaseNode(SU, SU->BotReadyCycle, false);
1249     BotCand.SU = nullptr;
1250   }
1251 
1252   void registerRoots() override;
1253 
1254 protected:
1255   ScheduleDAGMILive *DAG = nullptr;
1256 
1257   MachineSchedPolicy RegionPolicy;
1258 
1259   // State of the top and bottom scheduled instruction boundaries.
1260   SchedBoundary Top;
1261   SchedBoundary Bot;
1262 
1263   /// Candidate last picked from Top boundary.
1264   SchedCandidate TopCand;
1265   /// Candidate last picked from Bot boundary.
1266   SchedCandidate BotCand;
1267 
1268   void checkAcyclicLatency();
1269 
1270   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
1271                      const RegPressureTracker &RPTracker,
1272                      RegPressureTracker &TempTracker);
1273 
1274   virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
1275                             SchedBoundary *Zone) const;
1276 
1277   SUnit *pickNodeBidirectional(bool &IsTopNode);
1278 
1279   void pickNodeFromQueue(SchedBoundary &Zone,
1280                          const CandPolicy &ZonePolicy,
1281                          const RegPressureTracker &RPTracker,
1282                          SchedCandidate &Candidate);
1283 
1284   void reschedulePhysReg(SUnit *SU, bool isTop);
1285 };
1286 
1287 /// PostGenericScheduler - Interface to the scheduling algorithm used by
1288 /// ScheduleDAGMI.
1289 ///
1290 /// Callbacks from ScheduleDAGMI:
1291 ///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
1292 class PostGenericScheduler : public GenericSchedulerBase {
1293 protected:
1294   ScheduleDAGMI *DAG = nullptr;
1295   SchedBoundary Top;
1296   SchedBoundary Bot;
1297   MachineSchedPolicy RegionPolicy;
1298 
1299   /// Candidate last picked from Top boundary.
1300   SchedCandidate TopCand;
1301   /// Candidate last picked from Bot boundary.
1302   SchedCandidate BotCand;
1303 
1304 public:
PostGenericScheduler(const MachineSchedContext * C)1305   PostGenericScheduler(const MachineSchedContext *C)
1306       : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1307         Bot(SchedBoundary::BotQID, "BotQ") {}
1308 
1309   ~PostGenericScheduler() override = default;
1310 
1311   void initPolicy(MachineBasicBlock::iterator Begin,
1312                   MachineBasicBlock::iterator End,
1313                   unsigned NumRegionInstrs) override;
1314 
1315   /// PostRA scheduling does not track pressure.
shouldTrackPressure()1316   bool shouldTrackPressure() const override { return false; }
1317 
1318   void initialize(ScheduleDAGMI *Dag) override;
1319 
1320   void registerRoots() override;
1321 
1322   SUnit *pickNode(bool &IsTopNode) override;
1323 
1324   SUnit *pickNodeBidirectional(bool &IsTopNode);
1325 
scheduleTree(unsigned SubtreeID)1326   void scheduleTree(unsigned SubtreeID) override {
1327     llvm_unreachable("PostRA scheduler does not support subtree analysis.");
1328   }
1329 
1330   void schedNode(SUnit *SU, bool IsTopNode) override;
1331 
releaseTopNode(SUnit * SU)1332   void releaseTopNode(SUnit *SU) override {
1333     if (SU->isScheduled)
1334       return;
1335     Top.releaseNode(SU, SU->TopReadyCycle, false);
1336     TopCand.SU = nullptr;
1337   }
1338 
releaseBottomNode(SUnit * SU)1339   void releaseBottomNode(SUnit *SU) override {
1340     if (SU->isScheduled)
1341       return;
1342     Bot.releaseNode(SU, SU->BotReadyCycle, false);
1343     BotCand.SU = nullptr;
1344   }
1345 
1346 protected:
1347   virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
1348 
1349   void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
1350 };
1351 
1352 /// Create the standard converging machine scheduler. This will be used as the
1353 /// default scheduler if the target does not set a default.
1354 /// Adds default DAG mutations.
1355 ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C);
1356 
1357 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
1358 ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
1359 
1360 /// If ReorderWhileClustering is set to true, no attempt will be made to
1361 /// reduce reordering due to store clustering.
1362 std::unique_ptr<ScheduleDAGMutation>
1363 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1364                              const TargetRegisterInfo *TRI,
1365                              bool ReorderWhileClustering = false);
1366 
1367 /// If ReorderWhileClustering is set to true, no attempt will be made to
1368 /// reduce reordering due to store clustering.
1369 std::unique_ptr<ScheduleDAGMutation>
1370 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1371                               const TargetRegisterInfo *TRI,
1372                               bool ReorderWhileClustering = false);
1373 
1374 std::unique_ptr<ScheduleDAGMutation>
1375 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1376                                const TargetRegisterInfo *TRI);
1377 
1378 } // end namespace llvm
1379 
1380 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H
1381