xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides an interface for customizing the standard MachineScheduler
10 // pass. Note that the entire pass may be replaced as follows:
11 //
12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) {
13 //   PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID);
14 //   ...}
15 //
16 // The MachineScheduler pass is only responsible for choosing the regions to be
17 // scheduled. Targets can override the DAG builder and scheduler without
18 // replacing the pass as follows:
19 //
20 // ScheduleDAGInstrs *<Target>TargetMachine::
21 // createMachineScheduler(MachineSchedContext *C) {
22 //   return new CustomMachineScheduler(C);
23 // }
24 //
25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
26 // scheduling while updating the instruction stream, register pressure, and live
27 // intervals. Most targets don't need to override the DAG builder and list
28 // scheduler, but subtargets that require custom scheduling heuristics may
29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for
30 // selecting the highest priority node from the list:
31 //
32 // ScheduleDAGInstrs *<Target>TargetMachine::
33 // createMachineScheduler(MachineSchedContext *C) {
34 //   return new ScheduleDAGMILive(C, CustomStrategy(C));
35 // }
36 //
37 // The DAG builder can also be customized in a sense by adding DAG mutations
38 // that will run after DAG building and before list scheduling. DAG mutations
39 // can adjust dependencies based on target-specific knowledge or add weak edges
40 // to aid heuristics:
41 //
42 // ScheduleDAGInstrs *<Target>TargetMachine::
43 // createMachineScheduler(MachineSchedContext *C) {
44 //   ScheduleDAGMI *DAG = createSchedLive(C);
45 //   DAG->addMutation(new CustomDAGMutation(...));
46 //   return DAG;
47 // }
48 //
49 // A target that supports alternative schedulers can use the
50 // MachineSchedRegistry to allow command line selection. This can be done by
51 // implementing the following boilerplate:
52 //
53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) {
54 //  return new CustomMachineScheduler(C);
55 // }
56 // static MachineSchedRegistry
57 // SchedCustomRegistry("custom", "Run my target's custom scheduler",
58 //                     createCustomMachineSched);
59 //
60 //
61 // Finally, subtargets that don't need to implement custom heuristics but would
62 // like to configure the GenericScheduler's policy for a given scheduler region,
63 // including scheduling direction and register pressure tracking policy, can do
64 // this:
65 //
66 // void <SubTarget>Subtarget::
67 // overrideSchedPolicy(MachineSchedPolicy &Policy,
68 //                     unsigned NumRegionInstrs) const {
69 //   Policy.<Flag> = true;
70 // }
71 //
72 //===----------------------------------------------------------------------===//
73 
74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
75 #define LLVM_CODEGEN_MACHINESCHEDULER_H
76 
77 #include "llvm/ADT/APInt.h"
78 #include "llvm/ADT/ArrayRef.h"
79 #include "llvm/ADT/BitVector.h"
80 #include "llvm/ADT/STLExtras.h"
81 #include "llvm/ADT/SmallVector.h"
82 #include "llvm/ADT/StringRef.h"
83 #include "llvm/ADT/Twine.h"
84 #include "llvm/CodeGen/MachineBasicBlock.h"
85 #include "llvm/CodeGen/MachinePassRegistry.h"
86 #include "llvm/CodeGen/RegisterPressure.h"
87 #include "llvm/CodeGen/ScheduleDAG.h"
88 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
89 #include "llvm/CodeGen/ScheduleDAGMutation.h"
90 #include "llvm/CodeGen/TargetSchedule.h"
91 #include "llvm/Support/CommandLine.h"
92 #include "llvm/Support/Compiler.h"
93 #include "llvm/Support/ErrorHandling.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <llvm/Support/raw_ostream.h>
97 #include <memory>
98 #include <string>
99 #include <vector>
100 
101 namespace llvm {
102 namespace impl_detail {
103 // FIXME: Remove these declarations once RegisterClassInfo is queryable as an
104 // analysis.
105 class MachineSchedulerImpl;
106 class PostMachineSchedulerImpl;
107 } // namespace impl_detail
108 
109 namespace MISched {
110 enum Direction {
111   Unspecified,
112   TopDown,
113   BottomUp,
114   Bidirectional,
115 };
116 } // namespace MISched
117 
118 LLVM_ABI extern cl::opt<MISched::Direction> PreRADirection;
119 LLVM_ABI extern cl::opt<bool> VerifyScheduling;
120 #ifndef NDEBUG
121 extern cl::opt<bool> ViewMISchedDAGs;
122 extern cl::opt<bool> PrintDAGs;
123 #else
124 LLVM_ABI extern const bool ViewMISchedDAGs;
125 LLVM_ABI extern const bool PrintDAGs;
126 #endif
127 
128 class AAResults;
129 class LiveIntervals;
130 class MachineDominatorTree;
131 class MachineFunction;
132 class MachineInstr;
133 class MachineLoopInfo;
134 class RegisterClassInfo;
135 class SchedDFSResult;
136 class ScheduleHazardRecognizer;
137 class TargetInstrInfo;
138 class TargetPassConfig;
139 class TargetRegisterInfo;
140 
141 /// MachineSchedContext provides enough context from the MachineScheduler pass
142 /// for the target to instantiate a scheduler.
143 struct LLVM_ABI MachineSchedContext {
144   MachineFunction *MF = nullptr;
145   const MachineLoopInfo *MLI = nullptr;
146   const MachineDominatorTree *MDT = nullptr;
147   const TargetMachine *TM = nullptr;
148   AAResults *AA = nullptr;
149   LiveIntervals *LIS = nullptr;
150 
151   RegisterClassInfo *RegClassInfo;
152 
153   MachineSchedContext();
154   MachineSchedContext &operator=(const MachineSchedContext &other) = delete;
155   MachineSchedContext(const MachineSchedContext &other) = delete;
156   virtual ~MachineSchedContext();
157 };
158 
159 /// MachineSchedRegistry provides a selection of available machine instruction
160 /// schedulers.
161 class MachineSchedRegistry
162     : public MachinePassRegistryNode<
163           ScheduleDAGInstrs *(*)(MachineSchedContext *)> {
164 public:
165   using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *);
166 
167   // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
168   using FunctionPassCtor = ScheduleDAGCtor;
169 
170   LLVM_ABI static MachinePassRegistry<ScheduleDAGCtor> Registry;
171 
MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)172   MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
173       : MachinePassRegistryNode(N, D, C) {
174     Registry.Add(this);
175   }
176 
~MachineSchedRegistry()177   ~MachineSchedRegistry() { Registry.Remove(this); }
178 
179   // Accessors.
180   //
getNext()181   MachineSchedRegistry *getNext() const {
182     return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
183   }
184 
getList()185   static MachineSchedRegistry *getList() {
186     return (MachineSchedRegistry *)Registry.getList();
187   }
188 
setListener(MachinePassRegistryListener<FunctionPassCtor> * L)189   static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
190     Registry.setListener(L);
191   }
192 };
193 
194 class ScheduleDAGMI;
195 
196 /// Define a generic scheduling policy for targets that don't provide their own
197 /// MachineSchedStrategy. This can be overriden for each scheduling region
198 /// before building the DAG.
199 struct MachineSchedPolicy {
200   // Allow the scheduler to disable register pressure tracking.
201   bool ShouldTrackPressure = false;
202   /// Track LaneMasks to allow reordering of independent subregister writes
203   /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks()
204   bool ShouldTrackLaneMasks = false;
205 
206   // Allow the scheduler to force top-down or bottom-up scheduling. If neither
207   // is true, the scheduler runs in both directions and converges.
208   bool OnlyTopDown = false;
209   bool OnlyBottomUp = false;
210 
211   // Disable heuristic that tries to fetch nodes from long dependency chains
212   // first.
213   bool DisableLatencyHeuristic = false;
214 
215   // Compute DFSResult for use in scheduling heuristics.
216   bool ComputeDFSResult = false;
217 
218   MachineSchedPolicy() = default;
219 };
220 
221 /// MachineSchedStrategy - Interface to the scheduling algorithm used by
222 /// ScheduleDAGMI.
223 ///
224 /// Initialization sequence:
225 ///   initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots
226 class LLVM_ABI MachineSchedStrategy {
227   virtual void anchor();
228 
229 public:
230   virtual ~MachineSchedStrategy() = default;
231 
232   /// Optionally override the per-region scheduling policy.
initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)233   virtual void initPolicy(MachineBasicBlock::iterator Begin,
234                           MachineBasicBlock::iterator End,
235                           unsigned NumRegionInstrs) {}
236 
getPolicy()237   virtual MachineSchedPolicy getPolicy() const { return {}; }
dumpPolicy()238   virtual void dumpPolicy() const {}
239 
240   /// Check if pressure tracking is needed before building the DAG and
241   /// initializing this strategy. Called after initPolicy.
shouldTrackPressure()242   virtual bool shouldTrackPressure() const { return true; }
243 
244   /// Returns true if lanemasks should be tracked. LaneMask tracking is
245   /// necessary to reorder independent subregister defs for the same vreg.
246   /// This has to be enabled in combination with shouldTrackPressure().
shouldTrackLaneMasks()247   virtual bool shouldTrackLaneMasks() const { return false; }
248 
249   // If this method returns true, handling of the scheduling regions
250   // themselves (in case of a scheduling boundary in MBB) will be done
251   // beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()252   virtual bool doMBBSchedRegionsTopDown() const { return false; }
253 
254   /// Initialize the strategy after building the DAG for a new region.
255   virtual void initialize(ScheduleDAGMI *DAG) = 0;
256 
257   /// Tell the strategy that MBB is about to be processed.
enterMBB(MachineBasicBlock * MBB)258   virtual void enterMBB(MachineBasicBlock *MBB) {};
259 
260   /// Tell the strategy that current MBB is done.
leaveMBB()261   virtual void leaveMBB() {};
262 
263   /// Notify this strategy that all roots have been released (including those
264   /// that depend on EntrySU or ExitSU).
registerRoots()265   virtual void registerRoots() {}
266 
267   /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
268   /// schedule the node at the top of the unscheduled region. Otherwise it will
269   /// be scheduled at the bottom.
270   virtual SUnit *pickNode(bool &IsTopNode) = 0;
271 
272   /// Scheduler callback to notify that a new subtree is scheduled.
scheduleTree(unsigned SubtreeID)273   virtual void scheduleTree(unsigned SubtreeID) {}
274 
275   /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
276   /// instruction and updated scheduled/remaining flags in the DAG nodes.
277   virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
278 
279   /// When all predecessor dependencies have been resolved, free this node for
280   /// top-down scheduling.
281   virtual void releaseTopNode(SUnit *SU) = 0;
282 
283   /// When all successor dependencies have been resolved, free this node for
284   /// bottom-up scheduling.
285   virtual void releaseBottomNode(SUnit *SU) = 0;
286 };
287 
288 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
289 /// schedules machine instructions according to the given MachineSchedStrategy
290 /// without much extra book-keeping. This is the common functionality between
291 /// PreRA and PostRA MachineScheduler.
292 class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs {
293 protected:
294   AAResults *AA;
295   LiveIntervals *LIS;
296   std::unique_ptr<MachineSchedStrategy> SchedImpl;
297 
298   /// Ordered list of DAG postprocessing steps.
299   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
300 
301   /// The top of the unscheduled zone.
302   MachineBasicBlock::iterator CurrentTop;
303 
304   /// The bottom of the unscheduled zone.
305   MachineBasicBlock::iterator CurrentBottom;
306 
307 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
308   /// The number of instructions scheduled so far. Used to cut off the
309   /// scheduler at the point determined by misched-cutoff.
310   unsigned NumInstrsScheduled = 0;
311 #endif
312 
313 public:
ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)314   ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
315                 bool RemoveKillFlags)
316       : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
317         LIS(C->LIS), SchedImpl(std::move(S)) {}
318 
319   // Provide a vtable anchor
320   ~ScheduleDAGMI() override;
321 
322   /// If this method returns true, handling of the scheduling regions
323   /// themselves (in case of a scheduling boundary in MBB) will be done
324   /// beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()325   bool doMBBSchedRegionsTopDown() const override {
326     return SchedImpl->doMBBSchedRegionsTopDown();
327   }
328 
329   // Returns LiveIntervals instance for use in DAG mutators and such.
getLIS()330   LiveIntervals *getLIS() const { return LIS; }
331 
332   /// Return true if this DAG supports VReg liveness and RegPressure.
hasVRegLiveness()333   virtual bool hasVRegLiveness() const { return false; }
334 
335   /// Add a postprocessing step to the DAG builder.
336   /// Mutations are applied in the order that they are added after normal DAG
337   /// building and before MachineSchedStrategy initialization.
338   ///
339   /// ScheduleDAGMI takes ownership of the Mutation object.
addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)340   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
341     if (Mutation)
342       Mutations.push_back(std::move(Mutation));
343   }
344 
top()345   MachineBasicBlock::iterator top() const { return CurrentTop; }
bottom()346   MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
347 
348   /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
349   /// region. This covers all instructions in a block, while schedule() may only
350   /// cover a subset.
351   void enterRegion(MachineBasicBlock *bb,
352                    MachineBasicBlock::iterator begin,
353                    MachineBasicBlock::iterator end,
354                    unsigned regioninstrs) override;
355 
356   /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
357   /// reorderable instructions.
358   void schedule() override;
359 
360   void startBlock(MachineBasicBlock *bb) override;
361   void finishBlock() override;
362 
363   /// Change the position of an instruction within the basic block and update
364   /// live ranges and region boundary iterators.
365   void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
366 
367   void viewGraph(const Twine &Name, const Twine &Title) override;
368   void viewGraph() override;
369 
370 protected:
371   // Top-Level entry points for the schedule() driver...
372 
373   /// Apply each ScheduleDAGMutation step in order. This allows different
374   /// instances of ScheduleDAGMI to perform custom DAG postprocessing.
375   void postProcessDAG();
376 
377   /// Release ExitSU predecessors and setup scheduler queues.
378   void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
379 
380   /// Update scheduler DAG and queues after scheduling an instruction.
381   void updateQueues(SUnit *SU, bool IsTopNode);
382 
383   /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
384   void placeDebugValues();
385 
386   /// dump the scheduled Sequence.
387   void dumpSchedule() const;
388   /// Print execution trace of the schedule top-down or bottom-up.
389   void dumpScheduleTraceTopDown() const;
390   void dumpScheduleTraceBottomUp() const;
391 
392   // Lesser helpers...
393   bool checkSchedLimit();
394 
395   void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
396                              SmallVectorImpl<SUnit*> &BotRoots);
397 
398   void releaseSucc(SUnit *SU, SDep *SuccEdge);
399   void releaseSuccessors(SUnit *SU);
400   void releasePred(SUnit *SU, SDep *PredEdge);
401   void releasePredecessors(SUnit *SU);
402 };
403 
404 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules
405 /// machine instructions while updating LiveIntervals and tracking regpressure.
406 class LLVM_ABI ScheduleDAGMILive : public ScheduleDAGMI {
407 protected:
408   RegisterClassInfo *RegClassInfo;
409 
410   /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
411   /// will be empty.
412   SchedDFSResult *DFSResult = nullptr;
413   BitVector ScheduledTrees;
414 
415   MachineBasicBlock::iterator LiveRegionEnd;
416 
417   /// Maps vregs to the SUnits of their uses in the current scheduling region.
418   VReg2SUnitMultiMap VRegUses;
419 
420   // Map each SU to its summary of pressure changes. This array is updated for
421   // liveness during bottom-up scheduling. Top-down scheduling may proceed but
422   // has no affect on the pressure diffs.
423   PressureDiffs SUPressureDiffs;
424 
425   /// Register pressure in this region computed by initRegPressure.
426   bool ShouldTrackPressure = false;
427   bool ShouldTrackLaneMasks = false;
428   IntervalPressure RegPressure;
429   RegPressureTracker RPTracker;
430 
431   /// List of pressure sets that exceed the target's pressure limit before
432   /// scheduling, listed in increasing set ID order. Each pressure set is paired
433   /// with its max pressure in the currently scheduled regions.
434   std::vector<PressureChange> RegionCriticalPSets;
435 
436   /// The top of the unscheduled zone.
437   IntervalPressure TopPressure;
438   RegPressureTracker TopRPTracker;
439 
440   /// The bottom of the unscheduled zone.
441   IntervalPressure BotPressure;
442   RegPressureTracker BotRPTracker;
443 
444 public:
ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)445   ScheduleDAGMILive(MachineSchedContext *C,
446                     std::unique_ptr<MachineSchedStrategy> S)
447       : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false),
448         RegClassInfo(C->RegClassInfo), RPTracker(RegPressure),
449         TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
450 
451   ~ScheduleDAGMILive() override;
452 
453   /// Return true if this DAG supports VReg liveness and RegPressure.
hasVRegLiveness()454   bool hasVRegLiveness() const override { return true; }
455 
456   /// Return true if register pressure tracking is enabled.
isTrackingPressure()457   bool isTrackingPressure() const { return ShouldTrackPressure; }
458 
459   /// Get current register pressure for the top scheduled instructions.
getTopPressure()460   const IntervalPressure &getTopPressure() const { return TopPressure; }
getTopRPTracker()461   const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
462 
463   /// Get current register pressure for the bottom scheduled instructions.
getBotPressure()464   const IntervalPressure &getBotPressure() const { return BotPressure; }
getBotRPTracker()465   const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
466 
467   /// Get register pressure for the entire scheduling region before scheduling.
getRegPressure()468   const IntervalPressure &getRegPressure() const { return RegPressure; }
469 
getRegionCriticalPSets()470   const std::vector<PressureChange> &getRegionCriticalPSets() const {
471     return RegionCriticalPSets;
472   }
473 
getPressureDiff(const SUnit * SU)474   PressureDiff &getPressureDiff(const SUnit *SU) {
475     return SUPressureDiffs[SU->NodeNum];
476   }
getPressureDiff(const SUnit * SU)477   const PressureDiff &getPressureDiff(const SUnit *SU) const {
478     return SUPressureDiffs[SU->NodeNum];
479   }
480 
481   /// Compute a DFSResult after DAG building is complete, and before any
482   /// queue comparisons.
483   void computeDFSResult();
484 
485   /// Return a non-null DFS result if the scheduling strategy initialized it.
getDFSResult()486   const SchedDFSResult *getDFSResult() const { return DFSResult; }
487 
getScheduledTrees()488   BitVector &getScheduledTrees() { return ScheduledTrees; }
489 
490   /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
491   /// region. This covers all instructions in a block, while schedule() may only
492   /// cover a subset.
493   void enterRegion(MachineBasicBlock *bb,
494                    MachineBasicBlock::iterator begin,
495                    MachineBasicBlock::iterator end,
496                    unsigned regioninstrs) override;
497 
498   /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
499   /// reorderable instructions.
500   void schedule() override;
501 
502   /// Compute the cyclic critical path through the DAG.
503   unsigned computeCyclicCriticalPath();
504 
505   void dump() const override;
506 
507 protected:
508   // Top-Level entry points for the schedule() driver...
509 
510   /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
511   /// enabled. This sets up three trackers. RPTracker will cover the entire DAG
512   /// region, TopTracker and BottomTracker will be initialized to the top and
513   /// bottom of the DAG region without covereing any unscheduled instruction.
514   void buildDAGWithRegPressure();
515 
516   /// Release ExitSU predecessors and setup scheduler queues. Re-position
517   /// the Top RP tracker in case the region beginning has changed.
518   void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
519 
520   /// Move an instruction and update register pressure.
521   void scheduleMI(SUnit *SU, bool IsTopNode);
522 
523   // Lesser helpers...
524 
525   void initRegPressure();
526 
527   void updatePressureDiffs(ArrayRef<VRegMaskOrUnit> LiveUses);
528 
529   void updateScheduledPressure(const SUnit *SU,
530                                const std::vector<unsigned> &NewMaxPressure);
531 
532   void collectVRegUses(SUnit &SU);
533 };
534 
535 //===----------------------------------------------------------------------===//
536 ///
537 /// Helpers for implementing custom MachineSchedStrategy classes. These take
538 /// care of the book-keeping associated with list scheduling heuristics.
539 ///
540 //===----------------------------------------------------------------------===//
541 
542 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
543 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
544 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
545 ///
546 /// This is a convenience class that may be used by implementations of
547 /// MachineSchedStrategy.
548 class ReadyQueue {
549   unsigned ID;
550   std::string Name;
551   std::vector<SUnit*> Queue;
552 
553 public:
ReadyQueue(unsigned id,const Twine & name)554   ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
555 
getID()556   unsigned getID() const { return ID; }
557 
getName()558   StringRef getName() const { return Name; }
559 
560   // SU is in this queue if it's NodeQueueID is a superset of this ID.
isInQueue(SUnit * SU)561   bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
562 
empty()563   bool empty() const { return Queue.empty(); }
564 
clear()565   void clear() { Queue.clear(); }
566 
size()567   unsigned size() const { return Queue.size(); }
568 
569   using iterator = std::vector<SUnit*>::iterator;
570 
begin()571   iterator begin() { return Queue.begin(); }
572 
end()573   iterator end() { return Queue.end(); }
574 
elements()575   ArrayRef<SUnit*> elements() { return Queue; }
576 
find(SUnit * SU)577   iterator find(SUnit *SU) { return llvm::find(Queue, SU); }
578 
push(SUnit * SU)579   void push(SUnit *SU) {
580     Queue.push_back(SU);
581     SU->NodeQueueId |= ID;
582   }
583 
remove(iterator I)584   iterator remove(iterator I) {
585     (*I)->NodeQueueId &= ~ID;
586     *I = Queue.back();
587     unsigned idx = I - Queue.begin();
588     Queue.pop_back();
589     return Queue.begin() + idx;
590   }
591 
592   LLVM_ABI void dump() const;
593 };
594 
595 /// Summarize the unscheduled region.
596 struct SchedRemainder {
597   // Critical path through the DAG in expected latency.
598   unsigned CriticalPath;
599   unsigned CyclicCritPath;
600 
601   // Scaled count of micro-ops left to schedule.
602   unsigned RemIssueCount;
603 
604   bool IsAcyclicLatencyLimited;
605 
606   // Unscheduled resources
607   SmallVector<unsigned, 16> RemainingCounts;
608 
SchedRemainderSchedRemainder609   SchedRemainder() { reset(); }
610 
resetSchedRemainder611   void reset() {
612     CriticalPath = 0;
613     CyclicCritPath = 0;
614     RemIssueCount = 0;
615     IsAcyclicLatencyLimited = false;
616     RemainingCounts.clear();
617   }
618 
619   LLVM_ABI void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
620 };
621 
622 /// ResourceSegments are a collection of intervals closed on the
623 /// left and opened on the right:
624 ///
625 ///     list{ [a1, b1), [a2, b2), ..., [a_N, b_N) }
626 ///
627 /// The collection has the following properties:
628 ///
629 /// 1. The list is ordered: a_i < b_i and b_i < a_(i+1)
630 ///
631 /// 2. The intervals in the collection do not intersect each other.
632 ///
633 /// A \ref ResourceSegments instance represents the cycle
634 /// reservation history of the instance of and individual resource.
635 class ResourceSegments {
636 public:
637   /// Represents an interval of discrete integer values closed on
638   /// the left and open on the right: [a, b).
639   typedef std::pair<int64_t, int64_t> IntervalTy;
640 
641   /// Adds an interval [a, b) to the collection of the instance.
642   ///
643   /// When adding [a, b[ to the collection, the operation merges the
644   /// adjacent intervals. For example
645   ///
646   ///       0  1  2  3  4  5  6  7  8  9  10
647   ///       [-----)  [--)     [--)
648   ///     +       [--)
649   ///     = [-----------)     [--)
650   ///
651   /// To be able to debug duplicate resource usage, the function has
652   /// assertion that checks that no interval should be added if it
653   /// overlaps any of the intervals in the collection. We can
654   /// require this because by definition a \ref ResourceSegments is
655   /// attached only to an individual resource instance.
656   LLVM_ABI void add(IntervalTy A, const unsigned CutOff = 10);
657 
658 public:
659   /// Checks whether intervals intersect.
660   LLVM_ABI static bool intersects(IntervalTy A, IntervalTy B);
661 
662   /// These function return the interval used by a resource in bottom and top
663   /// scheduling.
664   ///
665   /// Consider an instruction that uses resources X0, X1 and X2 as follows:
666   ///
667   /// X0 X1 X1 X2    +--------+-------------+--------------+
668   ///                |Resource|AcquireAtCycle|ReleaseAtCycle|
669   ///                +--------+-------------+--------------+
670   ///                |   X0   |     0       |       1      |
671   ///                +--------+-------------+--------------+
672   ///                |   X1   |     1       |       3      |
673   ///                +--------+-------------+--------------+
674   ///                |   X2   |     3       |       4      |
675   ///                +--------+-------------+--------------+
676   ///
677   /// If we can schedule the instruction at cycle C, we need to
678   /// compute the interval of the resource as follows:
679   ///
680   /// # TOP DOWN SCHEDULING
681   ///
682   /// Cycles scheduling flows to the _right_, in the same direction
683   /// of time.
684   ///
685   ///       C      1      2      3      4      5  ...
686   /// ------|------|------|------|------|------|----->
687   ///       X0     X1     X1     X2   ---> direction of time
688   /// X0    [C, C+1)
689   /// X1           [C+1,      C+3)
690   /// X2                         [C+3, C+4)
691   ///
692   /// Therefore, the formula to compute the interval for a resource
693   /// of an instruction that can be scheduled at cycle C in top-down
694   /// scheduling is:
695   ///
696   ///       [C+AcquireAtCycle, C+ReleaseAtCycle)
697   ///
698   ///
699   /// # BOTTOM UP SCHEDULING
700   ///
701   /// Cycles scheduling flows to the _left_, in opposite direction
702   /// of time.
703   ///
704   /// In bottom up scheduling, the scheduling happens in opposite
705   /// direction to the execution of the cycles of the
706   /// instruction. When the instruction is scheduled at cycle `C`,
707   /// the resources are allocated in the past relative to `C`:
708   ///
709   ///       2      1      C     -1     -2     -3     -4     -5  ...
710   /// <-----|------|------|------|------|------|------|------|---
711   ///                     X0     X1     X1     X2   ---> direction of time
712   /// X0           (C+1, C]
713   /// X1                  (C,        C-2]
714   /// X2                              (C-2, C-3]
715   ///
716   /// Therefore, the formula to compute the interval for a resource
717   /// of an instruction that can be scheduled at cycle C in bottom-up
718   /// scheduling is:
719   ///
720   ///       [C-ReleaseAtCycle+1, C-AcquireAtCycle+1)
721   ///
722   ///
723   /// NOTE: In both cases, the number of cycles booked by a
724   /// resources is the value (ReleaseAtCycle - AcquireAtCycle).
getResourceIntervalBottom(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)725   static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle,
726                                               unsigned ReleaseAtCycle) {
727     return std::make_pair<long, long>((long)C - (long)ReleaseAtCycle + 1L,
728                                       (long)C - (long)AcquireAtCycle + 1L);
729   }
getResourceIntervalTop(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)730   static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle,
731                                            unsigned ReleaseAtCycle) {
732     return std::make_pair<long, long>((long)C + (long)AcquireAtCycle,
733                                       (long)C + (long)ReleaseAtCycle);
734   }
735 
736 private:
737   /// Finds the first cycle in which a resource can be allocated.
738   ///
739   /// The function uses the \param IntervalBuider [*] to build a
740   /// resource interval [a, b[ out of the input parameters \param
741   /// CurrCycle, \param AcquireAtCycle and \param ReleaseAtCycle.
742   ///
743   /// The function then loops through the intervals in the ResourceSegments
744   /// and shifts the interval [a, b[ and the ReturnCycle to the
745   /// right until there is no intersection between the intervals of
746   /// the \ref ResourceSegments instance and the new shifted [a, b[. When
747   /// this condition is met, the ReturnCycle  (which
748   /// correspond to the cycle in which the resource can be
749   /// allocated) is returned.
750   ///
751   ///               c = CurrCycle in input
752   ///               c   1   2   3   4   5   6   7   8   9   10 ... ---> (time
753   ///               flow)
754   ///  ResourceSegments...  [---)   [-------)           [-----------)
755   ///               c   [1     3[  -> AcquireAtCycle=1, ReleaseAtCycle=3
756   ///                 ++c   [1     3)
757   ///                     ++c   [1     3)
758   ///                         ++c   [1     3)
759   ///                             ++c   [1     3)
760   ///                                 ++c   [1     3)    ---> returns c
761   ///                                 incremented by 5 (c+5)
762   ///
763   ///
764   /// Notice that for bottom-up scheduling the diagram is slightly
765   /// different because the current cycle c is always on the right
766   /// of the interval [a, b) (see \ref
767   /// `getResourceIntervalBottom`). This is because the cycle
768   /// increments for bottom-up scheduling moved in the direction
769   /// opposite to the direction of time:
770   ///
771   ///     --------> direction of time.
772   ///     XXYZZZ    (resource usage)
773   ///     --------> direction of top-down execution cycles.
774   ///     <-------- direction of bottom-up execution cycles.
775   ///
776   /// Even though bottom-up scheduling moves against the flow of
777   /// time, the algorithm used to find the first free slot in between
778   /// intervals is the same as for top-down scheduling.
779   ///
780   /// [*] See \ref `getResourceIntervalTop` and
781   /// \ref `getResourceIntervalBottom` to see how such resource intervals
782   /// are built.
783   LLVM_ABI unsigned getFirstAvailableAt(
784       unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
785       std::function<IntervalTy(unsigned, unsigned, unsigned)> IntervalBuilder)
786       const;
787 
788 public:
789   /// getFirstAvailableAtFromBottom and getFirstAvailableAtFromTop
790   /// should be merged in a single function in which a function that
791   /// creates the `NewInterval` is passed as a parameter.
getFirstAvailableAtFromBottom(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)792   unsigned getFirstAvailableAtFromBottom(unsigned CurrCycle,
793                                          unsigned AcquireAtCycle,
794                                          unsigned ReleaseAtCycle) const {
795     return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
796                                getResourceIntervalBottom);
797   }
getFirstAvailableAtFromTop(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)798   unsigned getFirstAvailableAtFromTop(unsigned CurrCycle,
799                                       unsigned AcquireAtCycle,
800                                       unsigned ReleaseAtCycle) const {
801     return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
802                                getResourceIntervalTop);
803   }
804 
805 private:
806   std::list<IntervalTy> _Intervals;
807   /// Merge all adjacent intervals in the collection. For all pairs
808   /// of adjacient intervals, it performs [a, b) + [b, c) -> [a, c).
809   ///
810   /// Before performing the merge operation, the intervals are
811   /// sorted with \ref sort_predicate.
812   LLVM_ABI void sortAndMerge();
813 
814 public:
815   // constructor for empty set
ResourceSegments()816   explicit ResourceSegments(){};
empty()817   bool empty() const { return _Intervals.empty(); }
ResourceSegments(const std::list<IntervalTy> & Intervals)818   explicit ResourceSegments(const std::list<IntervalTy> &Intervals)
819       : _Intervals(Intervals) {
820     sortAndMerge();
821   }
822 
823   friend bool operator==(const ResourceSegments &c1,
824                          const ResourceSegments &c2) {
825     return c1._Intervals == c2._Intervals;
826   }
827   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
828                                        const ResourceSegments &Segments) {
829     os << "{ ";
830     for (auto p : Segments._Intervals)
831       os << "[" << p.first << ", " << p.second << "), ";
832     os << "}\n";
833     return os;
834   }
835 };
836 
837 /// Each Scheduling boundary is associated with ready queues. It tracks the
838 /// current cycle in the direction of movement, and maintains the state
839 /// of "hazards" and other interlocks at the current cycle.
840 class SchedBoundary {
841 public:
842   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
843   enum {
844     TopQID = 1,
845     BotQID = 2,
846     LogMaxQID = 2
847   };
848 
849   ScheduleDAGMI *DAG = nullptr;
850   const TargetSchedModel *SchedModel = nullptr;
851   SchedRemainder *Rem = nullptr;
852 
853   ReadyQueue Available;
854   ReadyQueue Pending;
855 
856   ScheduleHazardRecognizer *HazardRec = nullptr;
857 
858 private:
859   /// True if the pending Q should be checked/updated before scheduling another
860   /// instruction.
861   bool CheckPending;
862 
863   /// Number of cycles it takes to issue the instructions scheduled in this
864   /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
865   /// See getStalls().
866   unsigned CurrCycle;
867 
868   /// Micro-ops issued in the current cycle
869   unsigned CurrMOps;
870 
871   /// MinReadyCycle - Cycle of the soonest available instruction.
872   unsigned MinReadyCycle;
873 
874   // The expected latency of the critical path in this scheduled zone.
875   unsigned ExpectedLatency;
876 
877   // The latency of dependence chains leading into this zone.
878   // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
879   // For each cycle scheduled: DLat -= 1.
880   unsigned DependentLatency;
881 
882   /// Count the scheduled (issued) micro-ops that can be retired by
883   /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
884   unsigned RetiredMOps;
885 
886   // Count scheduled resources that have been executed. Resources are
887   // considered executed if they become ready in the time that it takes to
888   // saturate any resource including the one in question. Counts are scaled
889   // for direct comparison with other resources. Counts can be compared with
890   // MOps * getMicroOpFactor and Latency * getLatencyFactor.
891   SmallVector<unsigned, 16> ExecutedResCounts;
892 
893   /// Cache the max count for a single resource.
894   unsigned MaxExecutedResCount;
895 
896   // Cache the critical resources ID in this scheduled zone.
897   unsigned ZoneCritResIdx;
898 
899   // Is the scheduled region resource limited vs. latency limited.
900   bool IsResourceLimited;
901 
902 public:
903 private:
904   /// Record how resources have been allocated across the cycles of
905   /// the execution.
906   std::map<unsigned, ResourceSegments> ReservedResourceSegments;
907   std::vector<unsigned> ReservedCycles;
908   /// For each PIdx, stores first index into ReservedResourceSegments that
909   /// corresponds to it.
910   ///
911   /// For example, consider the following 3 resources (ResourceCount =
912   /// 3):
913   ///
914   ///   +------------+--------+
915   ///   |ResourceName|NumUnits|
916   ///   +------------+--------+
917   ///   |     X      |    2   |
918   ///   +------------+--------+
919   ///   |     Y      |    3   |
920   ///   +------------+--------+
921   ///   |     Z      |    1   |
922   ///   +------------+--------+
923   ///
924   /// In this case, the total number of resource instances is 6. The
925   /// vector \ref ReservedResourceSegments will have a slot for each instance.
926   /// The vector \ref ReservedCyclesIndex will track at what index the first
927   /// instance of the resource is found in the vector of \ref
928   /// ReservedResourceSegments:
929   ///
930   ///                              Indexes of instances in
931   ///                              ReservedResourceSegments
932   ///
933   ///                              0   1   2   3   4  5
934   /// ReservedCyclesIndex[0] = 0; [X0, X1,
935   /// ReservedCyclesIndex[1] = 2;          Y0, Y1, Y2
936   /// ReservedCyclesIndex[2] = 5;                     Z
937   SmallVector<unsigned, 16> ReservedCyclesIndex;
938 
939   // For each PIdx, stores the resource group IDs of its subunits
940   SmallVector<APInt, 16> ResourceGroupSubUnitMasks;
941 
942 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
943   // Remember the greatest possible stall as an upper bound on the number of
944   // times we should retry the pending queue because of a hazard.
945   unsigned MaxObservedStall;
946 #endif
947 
948 public:
949   /// Pending queues extend the ready queues with the same ID and the
950   /// PendingFlag set.
SchedBoundary(unsigned ID,const Twine & Name)951   SchedBoundary(unsigned ID, const Twine &Name):
952     Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") {
953     reset();
954   }
955   SchedBoundary &operator=(const SchedBoundary &other) = delete;
956   SchedBoundary(const SchedBoundary &other) = delete;
957   LLVM_ABI ~SchedBoundary();
958 
959   LLVM_ABI void reset();
960 
961   LLVM_ABI void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
962                      SchedRemainder *rem);
963 
isTop()964   bool isTop() const {
965     return Available.getID() == TopQID;
966   }
967 
968   /// Number of cycles to issue the instructions scheduled in this zone.
getCurrCycle()969   unsigned getCurrCycle() const { return CurrCycle; }
970 
971   /// Micro-ops issued in the current cycle
getCurrMOps()972   unsigned getCurrMOps() const { return CurrMOps; }
973 
974   // The latency of dependence chains leading into this zone.
getDependentLatency()975   unsigned getDependentLatency() const { return DependentLatency; }
976 
977   /// Get the number of latency cycles "covered" by the scheduled
978   /// instructions. This is the larger of the critical path within the zone
979   /// and the number of cycles required to issue the instructions.
getScheduledLatency()980   unsigned getScheduledLatency() const {
981     return std::max(ExpectedLatency, CurrCycle);
982   }
983 
getUnscheduledLatency(SUnit * SU)984   unsigned getUnscheduledLatency(SUnit *SU) const {
985     return isTop() ? SU->getHeight() : SU->getDepth();
986   }
987 
getResourceCount(unsigned ResIdx)988   unsigned getResourceCount(unsigned ResIdx) const {
989     return ExecutedResCounts[ResIdx];
990   }
991 
992   /// Get the scaled count of scheduled micro-ops and resources, including
993   /// executed resources.
getCriticalCount()994   unsigned getCriticalCount() const {
995     if (!ZoneCritResIdx)
996       return RetiredMOps * SchedModel->getMicroOpFactor();
997     return getResourceCount(ZoneCritResIdx);
998   }
999 
1000   /// Get a scaled count for the minimum execution time of the scheduled
1001   /// micro-ops that are ready to execute by getExecutedCount. Notice the
1002   /// feedback loop.
getExecutedCount()1003   unsigned getExecutedCount() const {
1004     return std::max(CurrCycle * SchedModel->getLatencyFactor(),
1005                     MaxExecutedResCount);
1006   }
1007 
getZoneCritResIdx()1008   unsigned getZoneCritResIdx() const { return ZoneCritResIdx; }
1009 
1010   // Is the scheduled region resource limited vs. latency limited.
isResourceLimited()1011   bool isResourceLimited() const { return IsResourceLimited; }
1012 
1013   /// Get the difference between the given SUnit's ready time and the current
1014   /// cycle.
1015   LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU);
1016 
1017   LLVM_ABI unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
1018                                                    unsigned ReleaseAtCycle,
1019                                                    unsigned AcquireAtCycle);
1020 
1021   LLVM_ABI std::pair<unsigned, unsigned>
1022   getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
1023                        unsigned ReleaseAtCycle, unsigned AcquireAtCycle);
1024 
isUnbufferedGroup(unsigned PIdx)1025   bool isUnbufferedGroup(unsigned PIdx) const {
1026     return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin &&
1027            !SchedModel->getProcResource(PIdx)->BufferSize;
1028   }
1029 
1030   LLVM_ABI bool checkHazard(SUnit *SU);
1031 
1032   LLVM_ABI unsigned findMaxLatency(ArrayRef<SUnit *> ReadySUs);
1033 
1034   LLVM_ABI unsigned getOtherResourceCount(unsigned &OtherCritIdx);
1035 
1036   /// Release SU to make it ready. If it's not in hazard, remove it from
1037   /// pending queue (if already in) and push into available queue.
1038   /// Otherwise, push the SU into pending queue.
1039   ///
1040   /// @param SU The unit to be released.
1041   /// @param ReadyCycle Until which cycle the unit is ready.
1042   /// @param InPQueue Whether SU is already in pending queue.
1043   /// @param Idx Position offset in pending queue (if in it).
1044   LLVM_ABI void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
1045                             unsigned Idx = 0);
1046 
1047   LLVM_ABI void bumpCycle(unsigned NextCycle);
1048 
1049   LLVM_ABI void incExecutedResources(unsigned PIdx, unsigned Count);
1050 
1051   LLVM_ABI unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx,
1052                                   unsigned Cycles, unsigned ReadyCycle,
1053                                   unsigned StartAtCycle);
1054 
1055   LLVM_ABI void bumpNode(SUnit *SU);
1056 
1057   LLVM_ABI void releasePending();
1058 
1059   LLVM_ABI void removeReady(SUnit *SU);
1060 
1061   /// Call this before applying any other heuristics to the Available queue.
1062   /// Updates the Available/Pending Q's if necessary and returns the single
1063   /// available instruction, or NULL if there are multiple candidates.
1064   LLVM_ABI SUnit *pickOnlyChoice();
1065 
1066   /// Dump the state of the information that tracks resource usage.
1067   LLVM_ABI void dumpReservedCycles() const;
1068   LLVM_ABI void dumpScheduledState() const;
1069 };
1070 
1071 /// Base class for GenericScheduler. This class maintains information about
1072 /// scheduling candidates based on TargetSchedModel making it easy to implement
1073 /// heuristics for either preRA or postRA scheduling.
1074 class GenericSchedulerBase : public MachineSchedStrategy {
1075 public:
1076   /// Represent the type of SchedCandidate found within a single queue.
1077   /// pickNodeBidirectional depends on these listed by decreasing priority.
1078   enum CandReason : uint8_t {
1079     NoCand,
1080     Only1,
1081     PhysReg,
1082     RegExcess,
1083     RegCritical,
1084     Stall,
1085     Cluster,
1086     Weak,
1087     RegMax,
1088     ResourceReduce,
1089     ResourceDemand,
1090     BotHeightReduce,
1091     BotPathReduce,
1092     TopDepthReduce,
1093     TopPathReduce,
1094     NodeOrder,
1095     FirstValid
1096   };
1097 
1098 #ifndef NDEBUG
1099   static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
1100 #endif
1101 
1102   /// Policy for scheduling the next instruction in the candidate's zone.
1103   struct CandPolicy {
1104     bool ReduceLatency = false;
1105     unsigned ReduceResIdx = 0;
1106     unsigned DemandResIdx = 0;
1107 
1108     CandPolicy() = default;
1109 
1110     bool operator==(const CandPolicy &RHS) const {
1111       return ReduceLatency == RHS.ReduceLatency &&
1112              ReduceResIdx == RHS.ReduceResIdx &&
1113              DemandResIdx == RHS.DemandResIdx;
1114     }
1115     bool operator!=(const CandPolicy &RHS) const {
1116       return !(*this == RHS);
1117     }
1118   };
1119 
1120   /// Status of an instruction's critical resource consumption.
1121   struct SchedResourceDelta {
1122     // Count critical resources in the scheduled region required by SU.
1123     unsigned CritResources = 0;
1124 
1125     // Count critical resources from another region consumed by SU.
1126     unsigned DemandedResources = 0;
1127 
1128     SchedResourceDelta() = default;
1129 
1130     bool operator==(const SchedResourceDelta &RHS) const {
1131       return CritResources == RHS.CritResources
1132         && DemandedResources == RHS.DemandedResources;
1133     }
1134     bool operator!=(const SchedResourceDelta &RHS) const {
1135       return !operator==(RHS);
1136     }
1137   };
1138 
1139   /// Store the state used by GenericScheduler heuristics, required for the
1140   /// lifetime of one invocation of pickNode().
1141   struct SchedCandidate {
1142     CandPolicy Policy;
1143 
1144     // The best SUnit candidate.
1145     SUnit *SU;
1146 
1147     // The reason for this candidate.
1148     CandReason Reason;
1149 
1150     // Whether this candidate should be scheduled at top/bottom.
1151     bool AtTop;
1152 
1153     // Register pressure values for the best candidate.
1154     RegPressureDelta RPDelta;
1155 
1156     // Critical resource consumption of the best candidate.
1157     SchedResourceDelta ResDelta;
1158 
SchedCandidateSchedCandidate1159     SchedCandidate() { reset(CandPolicy()); }
SchedCandidateSchedCandidate1160     SchedCandidate(const CandPolicy &Policy) { reset(Policy); }
1161 
resetSchedCandidate1162     void reset(const CandPolicy &NewPolicy) {
1163       Policy = NewPolicy;
1164       SU = nullptr;
1165       Reason = NoCand;
1166       AtTop = false;
1167       RPDelta = RegPressureDelta();
1168       ResDelta = SchedResourceDelta();
1169     }
1170 
isValidSchedCandidate1171     bool isValid() const { return SU; }
1172 
1173     // Copy the status of another candidate without changing policy.
setBestSchedCandidate1174     void setBest(SchedCandidate &Best) {
1175       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1176       SU = Best.SU;
1177       Reason = Best.Reason;
1178       AtTop = Best.AtTop;
1179       RPDelta = Best.RPDelta;
1180       ResDelta = Best.ResDelta;
1181     }
1182 
1183     LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG,
1184                                     const TargetSchedModel *SchedModel);
1185   };
1186 
1187 protected:
1188   const MachineSchedContext *Context;
1189   const TargetSchedModel *SchedModel = nullptr;
1190   const TargetRegisterInfo *TRI = nullptr;
1191   unsigned TopIdx = 0;
1192   unsigned BotIdx = 0;
1193   unsigned NumRegionInstrs = 0;
1194 
1195   MachineSchedPolicy RegionPolicy;
1196 
1197   SchedRemainder Rem;
1198 
GenericSchedulerBase(const MachineSchedContext * C)1199   GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {}
1200 
1201   LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA,
1202                           SchedBoundary &CurrZone, SchedBoundary *OtherZone);
1203 
getPolicy()1204   MachineSchedPolicy getPolicy() const override { return RegionPolicy; }
1205 
1206 #ifndef NDEBUG
1207   void traceCandidate(const SchedCandidate &Cand);
1208 #endif
1209 
1210 private:
1211   bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
1212                            bool ComputeRemLatency, unsigned &RemLatency) const;
1213 };
1214 
1215 // Utility functions used by heuristics in tryCandidate().
1216 LLVM_ABI bool tryLess(int TryVal, int CandVal,
1217                       GenericSchedulerBase::SchedCandidate &TryCand,
1218                       GenericSchedulerBase::SchedCandidate &Cand,
1219                       GenericSchedulerBase::CandReason Reason);
1220 LLVM_ABI bool tryGreater(int TryVal, int CandVal,
1221                          GenericSchedulerBase::SchedCandidate &TryCand,
1222                          GenericSchedulerBase::SchedCandidate &Cand,
1223                          GenericSchedulerBase::CandReason Reason);
1224 LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
1225                          GenericSchedulerBase::SchedCandidate &Cand,
1226                          SchedBoundary &Zone);
1227 LLVM_ABI bool tryPressure(const PressureChange &TryP,
1228                           const PressureChange &CandP,
1229                           GenericSchedulerBase::SchedCandidate &TryCand,
1230                           GenericSchedulerBase::SchedCandidate &Cand,
1231                           GenericSchedulerBase::CandReason Reason,
1232                           const TargetRegisterInfo *TRI,
1233                           const MachineFunction &MF);
1234 LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop);
1235 LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop);
1236 
1237 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
1238 /// the schedule.
1239 class LLVM_ABI GenericScheduler : public GenericSchedulerBase {
1240 public:
GenericScheduler(const MachineSchedContext * C)1241   GenericScheduler(const MachineSchedContext *C):
1242     GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1243     Bot(SchedBoundary::BotQID, "BotQ") {}
1244 
1245   void initPolicy(MachineBasicBlock::iterator Begin,
1246                   MachineBasicBlock::iterator End,
1247                   unsigned NumRegionInstrs) override;
1248 
1249   void dumpPolicy() const override;
1250 
shouldTrackPressure()1251   bool shouldTrackPressure() const override {
1252     return RegionPolicy.ShouldTrackPressure;
1253   }
1254 
shouldTrackLaneMasks()1255   bool shouldTrackLaneMasks() const override {
1256     return RegionPolicy.ShouldTrackLaneMasks;
1257   }
1258 
1259   void initialize(ScheduleDAGMI *dag) override;
1260 
1261   SUnit *pickNode(bool &IsTopNode) override;
1262 
1263   void schedNode(SUnit *SU, bool IsTopNode) override;
1264 
releaseTopNode(SUnit * SU)1265   void releaseTopNode(SUnit *SU) override {
1266     if (SU->isScheduled)
1267       return;
1268 
1269     Top.releaseNode(SU, SU->TopReadyCycle, false);
1270     TopCand.SU = nullptr;
1271   }
1272 
releaseBottomNode(SUnit * SU)1273   void releaseBottomNode(SUnit *SU) override {
1274     if (SU->isScheduled)
1275       return;
1276 
1277     Bot.releaseNode(SU, SU->BotReadyCycle, false);
1278     BotCand.SU = nullptr;
1279   }
1280 
1281   void registerRoots() override;
1282 
1283 protected:
1284   ScheduleDAGMILive *DAG = nullptr;
1285 
1286   // State of the top and bottom scheduled instruction boundaries.
1287   SchedBoundary Top;
1288   SchedBoundary Bot;
1289 
1290   ClusterInfo *TopCluster;
1291   ClusterInfo *BotCluster;
1292 
1293   /// Candidate last picked from Top boundary.
1294   SchedCandidate TopCand;
1295   /// Candidate last picked from Bot boundary.
1296   SchedCandidate BotCand;
1297 
1298   void checkAcyclicLatency();
1299 
1300   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
1301                      const RegPressureTracker &RPTracker,
1302                      RegPressureTracker &TempTracker);
1303 
1304   virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
1305                             SchedBoundary *Zone) const;
1306 
1307   SUnit *pickNodeBidirectional(bool &IsTopNode);
1308 
1309   void pickNodeFromQueue(SchedBoundary &Zone,
1310                          const CandPolicy &ZonePolicy,
1311                          const RegPressureTracker &RPTracker,
1312                          SchedCandidate &Candidate);
1313 
1314   void reschedulePhysReg(SUnit *SU, bool isTop);
1315 };
1316 
1317 /// PostGenericScheduler - Interface to the scheduling algorithm used by
1318 /// ScheduleDAGMI.
1319 ///
1320 /// Callbacks from ScheduleDAGMI:
1321 ///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
1322 class LLVM_ABI PostGenericScheduler : public GenericSchedulerBase {
1323 protected:
1324   ScheduleDAGMI *DAG = nullptr;
1325   SchedBoundary Top;
1326   SchedBoundary Bot;
1327 
1328   /// Candidate last picked from Top boundary.
1329   SchedCandidate TopCand;
1330   /// Candidate last picked from Bot boundary.
1331   SchedCandidate BotCand;
1332 
1333   ClusterInfo *TopCluster;
1334   ClusterInfo *BotCluster;
1335 
1336 public:
PostGenericScheduler(const MachineSchedContext * C)1337   PostGenericScheduler(const MachineSchedContext *C)
1338       : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1339         Bot(SchedBoundary::BotQID, "BotQ") {}
1340 
1341   ~PostGenericScheduler() override = default;
1342 
1343   void initPolicy(MachineBasicBlock::iterator Begin,
1344                   MachineBasicBlock::iterator End,
1345                   unsigned NumRegionInstrs) override;
1346 
1347   /// PostRA scheduling does not track pressure.
shouldTrackPressure()1348   bool shouldTrackPressure() const override { return false; }
1349 
1350   void initialize(ScheduleDAGMI *Dag) override;
1351 
1352   void registerRoots() override;
1353 
1354   SUnit *pickNode(bool &IsTopNode) override;
1355 
1356   SUnit *pickNodeBidirectional(bool &IsTopNode);
1357 
scheduleTree(unsigned SubtreeID)1358   void scheduleTree(unsigned SubtreeID) override {
1359     llvm_unreachable("PostRA scheduler does not support subtree analysis.");
1360   }
1361 
1362   void schedNode(SUnit *SU, bool IsTopNode) override;
1363 
releaseTopNode(SUnit * SU)1364   void releaseTopNode(SUnit *SU) override {
1365     if (SU->isScheduled)
1366       return;
1367     Top.releaseNode(SU, SU->TopReadyCycle, false);
1368     TopCand.SU = nullptr;
1369   }
1370 
releaseBottomNode(SUnit * SU)1371   void releaseBottomNode(SUnit *SU) override {
1372     if (SU->isScheduled)
1373       return;
1374     Bot.releaseNode(SU, SU->BotReadyCycle, false);
1375     BotCand.SU = nullptr;
1376   }
1377 
1378 protected:
1379   virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
1380 
1381   void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
1382 };
1383 
1384 /// If ReorderWhileClustering is set to true, no attempt will be made to
1385 /// reduce reordering due to store clustering.
1386 LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1387 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1388                              const TargetRegisterInfo *TRI,
1389                              bool ReorderWhileClustering = false);
1390 
1391 /// If ReorderWhileClustering is set to true, no attempt will be made to
1392 /// reduce reordering due to store clustering.
1393 LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1394 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1395                               const TargetRegisterInfo *TRI,
1396                               bool ReorderWhileClustering = false);
1397 
1398 LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1399 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1400                                const TargetRegisterInfo *TRI);
1401 
1402 /// Create the standard converging machine scheduler. This will be used as the
1403 /// default scheduler if the target does not set a default.
1404 /// Adds default DAG mutations.
1405 template <typename Strategy = GenericScheduler>
createSchedLive(MachineSchedContext * C)1406 ScheduleDAGMILive *createSchedLive(MachineSchedContext *C) {
1407   ScheduleDAGMILive *DAG =
1408       new ScheduleDAGMILive(C, std::make_unique<Strategy>(C));
1409   // Register DAG post-processors.
1410   //
1411   // FIXME: extend the mutation API to allow earlier mutations to instantiate
1412   // data and pass it to later mutations. Have a single mutation that gathers
1413   // the interesting nodes in one pass.
1414   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
1415 
1416   const TargetSubtargetInfo &STI = C->MF->getSubtarget();
1417   // Add MacroFusion mutation if fusions are not empty.
1418   const auto &MacroFusions = STI.getMacroFusions();
1419   if (!MacroFusions.empty())
1420     DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
1421   return DAG;
1422 }
1423 
1424 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
1425 template <typename Strategy = PostGenericScheduler>
createSchedPostRA(MachineSchedContext * C)1426 ScheduleDAGMI *createSchedPostRA(MachineSchedContext *C) {
1427   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, std::make_unique<Strategy>(C),
1428                                          /*RemoveKillFlags=*/true);
1429   const TargetSubtargetInfo &STI = C->MF->getSubtarget();
1430   // Add MacroFusion mutation if fusions are not empty.
1431   const auto &MacroFusions = STI.getMacroFusions();
1432   if (!MacroFusions.empty())
1433     DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
1434   return DAG;
1435 }
1436 
1437 class MachineSchedulerPass : public PassInfoMixin<MachineSchedulerPass> {
1438   // FIXME: Remove this member once RegisterClassInfo is queryable as an
1439   // analysis.
1440   std::unique_ptr<impl_detail::MachineSchedulerImpl> Impl;
1441   const TargetMachine *TM;
1442 
1443 public:
1444   LLVM_ABI MachineSchedulerPass(const TargetMachine *TM);
1445   LLVM_ABI MachineSchedulerPass(MachineSchedulerPass &&Other);
1446   LLVM_ABI ~MachineSchedulerPass();
1447   LLVM_ABI PreservedAnalyses run(MachineFunction &MF,
1448                                  MachineFunctionAnalysisManager &MFAM);
1449 };
1450 
1451 class PostMachineSchedulerPass
1452     : public PassInfoMixin<PostMachineSchedulerPass> {
1453   // FIXME: Remove this member once RegisterClassInfo is queryable as an
1454   // analysis.
1455   std::unique_ptr<impl_detail::PostMachineSchedulerImpl> Impl;
1456   const TargetMachine *TM;
1457 
1458 public:
1459   LLVM_ABI PostMachineSchedulerPass(const TargetMachine *TM);
1460   LLVM_ABI PostMachineSchedulerPass(PostMachineSchedulerPass &&Other);
1461   LLVM_ABI ~PostMachineSchedulerPass();
1462   LLVM_ABI PreservedAnalyses run(MachineFunction &MF,
1463                                  MachineFunctionAnalysisManager &MFAM);
1464 };
1465 } // end namespace llvm
1466 
1467 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H
1468