1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides an interface for customizing the standard MachineScheduler
10 // pass. Note that the entire pass may be replaced as follows:
11 //
12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) {
13 // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID);
14 // ...}
15 //
16 // The MachineScheduler pass is only responsible for choosing the regions to be
17 // scheduled. Targets can override the DAG builder and scheduler without
18 // replacing the pass as follows:
19 //
20 // ScheduleDAGInstrs *<Target>TargetMachine::
21 // createMachineScheduler(MachineSchedContext *C) {
22 // return new CustomMachineScheduler(C);
23 // }
24 //
25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
26 // scheduling while updating the instruction stream, register pressure, and live
27 // intervals. Most targets don't need to override the DAG builder and list
28 // scheduler, but subtargets that require custom scheduling heuristics may
29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for
30 // selecting the highest priority node from the list:
31 //
32 // ScheduleDAGInstrs *<Target>TargetMachine::
33 // createMachineScheduler(MachineSchedContext *C) {
34 // return new ScheduleDAGMILive(C, CustomStrategy(C));
35 // }
36 //
37 // The DAG builder can also be customized in a sense by adding DAG mutations
38 // that will run after DAG building and before list scheduling. DAG mutations
39 // can adjust dependencies based on target-specific knowledge or add weak edges
40 // to aid heuristics:
41 //
42 // ScheduleDAGInstrs *<Target>TargetMachine::
43 // createMachineScheduler(MachineSchedContext *C) {
44 // ScheduleDAGMI *DAG = createSchedLive(C);
45 // DAG->addMutation(new CustomDAGMutation(...));
46 // return DAG;
47 // }
48 //
49 // A target that supports alternative schedulers can use the
50 // MachineSchedRegistry to allow command line selection. This can be done by
51 // implementing the following boilerplate:
52 //
53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) {
54 // return new CustomMachineScheduler(C);
55 // }
56 // static MachineSchedRegistry
57 // SchedCustomRegistry("custom", "Run my target's custom scheduler",
58 // createCustomMachineSched);
59 //
60 //
61 // Finally, subtargets that don't need to implement custom heuristics but would
62 // like to configure the GenericScheduler's policy for a given scheduler region,
63 // including scheduling direction and register pressure tracking policy, can do
64 // this:
65 //
66 // void <SubTarget>Subtarget::
67 // overrideSchedPolicy(MachineSchedPolicy &Policy,
68 // unsigned NumRegionInstrs) const {
69 // Policy.<Flag> = true;
70 // }
71 //
72 //===----------------------------------------------------------------------===//
73
74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
75 #define LLVM_CODEGEN_MACHINESCHEDULER_H
76
77 #include "llvm/ADT/APInt.h"
78 #include "llvm/ADT/ArrayRef.h"
79 #include "llvm/ADT/BitVector.h"
80 #include "llvm/ADT/STLExtras.h"
81 #include "llvm/ADT/SmallVector.h"
82 #include "llvm/ADT/StringRef.h"
83 #include "llvm/ADT/Twine.h"
84 #include "llvm/CodeGen/MachineBasicBlock.h"
85 #include "llvm/CodeGen/MachinePassRegistry.h"
86 #include "llvm/CodeGen/RegisterPressure.h"
87 #include "llvm/CodeGen/ScheduleDAG.h"
88 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
89 #include "llvm/CodeGen/ScheduleDAGMutation.h"
90 #include "llvm/CodeGen/TargetSchedule.h"
91 #include "llvm/Support/CommandLine.h"
92 #include "llvm/Support/Compiler.h"
93 #include "llvm/Support/ErrorHandling.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <llvm/Support/raw_ostream.h>
97 #include <memory>
98 #include <string>
99 #include <vector>
100
101 namespace llvm {
102 namespace impl_detail {
103 // FIXME: Remove these declarations once RegisterClassInfo is queryable as an
104 // analysis.
105 class MachineSchedulerImpl;
106 class PostMachineSchedulerImpl;
107 } // namespace impl_detail
108
109 namespace MISched {
110 enum Direction {
111 Unspecified,
112 TopDown,
113 BottomUp,
114 Bidirectional,
115 };
116 } // namespace MISched
117
118 LLVM_ABI extern cl::opt<MISched::Direction> PreRADirection;
119 LLVM_ABI extern cl::opt<bool> VerifyScheduling;
120 #ifndef NDEBUG
121 extern cl::opt<bool> ViewMISchedDAGs;
122 extern cl::opt<bool> PrintDAGs;
123 #else
124 LLVM_ABI extern const bool ViewMISchedDAGs;
125 LLVM_ABI extern const bool PrintDAGs;
126 #endif
127
128 class AAResults;
129 class LiveIntervals;
130 class MachineDominatorTree;
131 class MachineFunction;
132 class MachineInstr;
133 class MachineLoopInfo;
134 class RegisterClassInfo;
135 class SchedDFSResult;
136 class ScheduleHazardRecognizer;
137 class TargetInstrInfo;
138 class TargetPassConfig;
139 class TargetRegisterInfo;
140
141 /// MachineSchedContext provides enough context from the MachineScheduler pass
142 /// for the target to instantiate a scheduler.
143 struct LLVM_ABI MachineSchedContext {
144 MachineFunction *MF = nullptr;
145 const MachineLoopInfo *MLI = nullptr;
146 const MachineDominatorTree *MDT = nullptr;
147 const TargetMachine *TM = nullptr;
148 AAResults *AA = nullptr;
149 LiveIntervals *LIS = nullptr;
150
151 RegisterClassInfo *RegClassInfo;
152
153 MachineSchedContext();
154 MachineSchedContext &operator=(const MachineSchedContext &other) = delete;
155 MachineSchedContext(const MachineSchedContext &other) = delete;
156 virtual ~MachineSchedContext();
157 };
158
159 /// MachineSchedRegistry provides a selection of available machine instruction
160 /// schedulers.
161 class MachineSchedRegistry
162 : public MachinePassRegistryNode<
163 ScheduleDAGInstrs *(*)(MachineSchedContext *)> {
164 public:
165 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *);
166
167 // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
168 using FunctionPassCtor = ScheduleDAGCtor;
169
170 LLVM_ABI static MachinePassRegistry<ScheduleDAGCtor> Registry;
171
MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)172 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
173 : MachinePassRegistryNode(N, D, C) {
174 Registry.Add(this);
175 }
176
~MachineSchedRegistry()177 ~MachineSchedRegistry() { Registry.Remove(this); }
178
179 // Accessors.
180 //
getNext()181 MachineSchedRegistry *getNext() const {
182 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
183 }
184
getList()185 static MachineSchedRegistry *getList() {
186 return (MachineSchedRegistry *)Registry.getList();
187 }
188
setListener(MachinePassRegistryListener<FunctionPassCtor> * L)189 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
190 Registry.setListener(L);
191 }
192 };
193
194 class ScheduleDAGMI;
195
196 /// Define a generic scheduling policy for targets that don't provide their own
197 /// MachineSchedStrategy. This can be overriden for each scheduling region
198 /// before building the DAG.
199 struct MachineSchedPolicy {
200 // Allow the scheduler to disable register pressure tracking.
201 bool ShouldTrackPressure = false;
202 /// Track LaneMasks to allow reordering of independent subregister writes
203 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks()
204 bool ShouldTrackLaneMasks = false;
205
206 // Allow the scheduler to force top-down or bottom-up scheduling. If neither
207 // is true, the scheduler runs in both directions and converges.
208 bool OnlyTopDown = false;
209 bool OnlyBottomUp = false;
210
211 // Disable heuristic that tries to fetch nodes from long dependency chains
212 // first.
213 bool DisableLatencyHeuristic = false;
214
215 // Compute DFSResult for use in scheduling heuristics.
216 bool ComputeDFSResult = false;
217
218 MachineSchedPolicy() = default;
219 };
220
221 /// MachineSchedStrategy - Interface to the scheduling algorithm used by
222 /// ScheduleDAGMI.
223 ///
224 /// Initialization sequence:
225 /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots
226 class LLVM_ABI MachineSchedStrategy {
227 virtual void anchor();
228
229 public:
230 virtual ~MachineSchedStrategy() = default;
231
232 /// Optionally override the per-region scheduling policy.
initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)233 virtual void initPolicy(MachineBasicBlock::iterator Begin,
234 MachineBasicBlock::iterator End,
235 unsigned NumRegionInstrs) {}
236
getPolicy()237 virtual MachineSchedPolicy getPolicy() const { return {}; }
dumpPolicy()238 virtual void dumpPolicy() const {}
239
240 /// Check if pressure tracking is needed before building the DAG and
241 /// initializing this strategy. Called after initPolicy.
shouldTrackPressure()242 virtual bool shouldTrackPressure() const { return true; }
243
244 /// Returns true if lanemasks should be tracked. LaneMask tracking is
245 /// necessary to reorder independent subregister defs for the same vreg.
246 /// This has to be enabled in combination with shouldTrackPressure().
shouldTrackLaneMasks()247 virtual bool shouldTrackLaneMasks() const { return false; }
248
249 // If this method returns true, handling of the scheduling regions
250 // themselves (in case of a scheduling boundary in MBB) will be done
251 // beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()252 virtual bool doMBBSchedRegionsTopDown() const { return false; }
253
254 /// Initialize the strategy after building the DAG for a new region.
255 virtual void initialize(ScheduleDAGMI *DAG) = 0;
256
257 /// Tell the strategy that MBB is about to be processed.
enterMBB(MachineBasicBlock * MBB)258 virtual void enterMBB(MachineBasicBlock *MBB) {};
259
260 /// Tell the strategy that current MBB is done.
leaveMBB()261 virtual void leaveMBB() {};
262
263 /// Notify this strategy that all roots have been released (including those
264 /// that depend on EntrySU or ExitSU).
registerRoots()265 virtual void registerRoots() {}
266
267 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
268 /// schedule the node at the top of the unscheduled region. Otherwise it will
269 /// be scheduled at the bottom.
270 virtual SUnit *pickNode(bool &IsTopNode) = 0;
271
272 /// Scheduler callback to notify that a new subtree is scheduled.
scheduleTree(unsigned SubtreeID)273 virtual void scheduleTree(unsigned SubtreeID) {}
274
275 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
276 /// instruction and updated scheduled/remaining flags in the DAG nodes.
277 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
278
279 /// When all predecessor dependencies have been resolved, free this node for
280 /// top-down scheduling.
281 virtual void releaseTopNode(SUnit *SU) = 0;
282
283 /// When all successor dependencies have been resolved, free this node for
284 /// bottom-up scheduling.
285 virtual void releaseBottomNode(SUnit *SU) = 0;
286 };
287
288 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
289 /// schedules machine instructions according to the given MachineSchedStrategy
290 /// without much extra book-keeping. This is the common functionality between
291 /// PreRA and PostRA MachineScheduler.
292 class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs {
293 protected:
294 AAResults *AA;
295 LiveIntervals *LIS;
296 std::unique_ptr<MachineSchedStrategy> SchedImpl;
297
298 /// Ordered list of DAG postprocessing steps.
299 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
300
301 /// The top of the unscheduled zone.
302 MachineBasicBlock::iterator CurrentTop;
303
304 /// The bottom of the unscheduled zone.
305 MachineBasicBlock::iterator CurrentBottom;
306
307 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
308 /// The number of instructions scheduled so far. Used to cut off the
309 /// scheduler at the point determined by misched-cutoff.
310 unsigned NumInstrsScheduled = 0;
311 #endif
312
313 public:
ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)314 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
315 bool RemoveKillFlags)
316 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
317 LIS(C->LIS), SchedImpl(std::move(S)) {}
318
319 // Provide a vtable anchor
320 ~ScheduleDAGMI() override;
321
322 /// If this method returns true, handling of the scheduling regions
323 /// themselves (in case of a scheduling boundary in MBB) will be done
324 /// beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()325 bool doMBBSchedRegionsTopDown() const override {
326 return SchedImpl->doMBBSchedRegionsTopDown();
327 }
328
329 // Returns LiveIntervals instance for use in DAG mutators and such.
getLIS()330 LiveIntervals *getLIS() const { return LIS; }
331
332 /// Return true if this DAG supports VReg liveness and RegPressure.
hasVRegLiveness()333 virtual bool hasVRegLiveness() const { return false; }
334
335 /// Add a postprocessing step to the DAG builder.
336 /// Mutations are applied in the order that they are added after normal DAG
337 /// building and before MachineSchedStrategy initialization.
338 ///
339 /// ScheduleDAGMI takes ownership of the Mutation object.
addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)340 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
341 if (Mutation)
342 Mutations.push_back(std::move(Mutation));
343 }
344
top()345 MachineBasicBlock::iterator top() const { return CurrentTop; }
bottom()346 MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
347
348 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
349 /// region. This covers all instructions in a block, while schedule() may only
350 /// cover a subset.
351 void enterRegion(MachineBasicBlock *bb,
352 MachineBasicBlock::iterator begin,
353 MachineBasicBlock::iterator end,
354 unsigned regioninstrs) override;
355
356 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
357 /// reorderable instructions.
358 void schedule() override;
359
360 void startBlock(MachineBasicBlock *bb) override;
361 void finishBlock() override;
362
363 /// Change the position of an instruction within the basic block and update
364 /// live ranges and region boundary iterators.
365 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
366
367 void viewGraph(const Twine &Name, const Twine &Title) override;
368 void viewGraph() override;
369
370 protected:
371 // Top-Level entry points for the schedule() driver...
372
373 /// Apply each ScheduleDAGMutation step in order. This allows different
374 /// instances of ScheduleDAGMI to perform custom DAG postprocessing.
375 void postProcessDAG();
376
377 /// Release ExitSU predecessors and setup scheduler queues.
378 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
379
380 /// Update scheduler DAG and queues after scheduling an instruction.
381 void updateQueues(SUnit *SU, bool IsTopNode);
382
383 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
384 void placeDebugValues();
385
386 /// dump the scheduled Sequence.
387 void dumpSchedule() const;
388 /// Print execution trace of the schedule top-down or bottom-up.
389 void dumpScheduleTraceTopDown() const;
390 void dumpScheduleTraceBottomUp() const;
391
392 // Lesser helpers...
393 bool checkSchedLimit();
394
395 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
396 SmallVectorImpl<SUnit*> &BotRoots);
397
398 void releaseSucc(SUnit *SU, SDep *SuccEdge);
399 void releaseSuccessors(SUnit *SU);
400 void releasePred(SUnit *SU, SDep *PredEdge);
401 void releasePredecessors(SUnit *SU);
402 };
403
404 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules
405 /// machine instructions while updating LiveIntervals and tracking regpressure.
406 class LLVM_ABI ScheduleDAGMILive : public ScheduleDAGMI {
407 protected:
408 RegisterClassInfo *RegClassInfo;
409
410 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
411 /// will be empty.
412 SchedDFSResult *DFSResult = nullptr;
413 BitVector ScheduledTrees;
414
415 MachineBasicBlock::iterator LiveRegionEnd;
416
417 /// Maps vregs to the SUnits of their uses in the current scheduling region.
418 VReg2SUnitMultiMap VRegUses;
419
420 // Map each SU to its summary of pressure changes. This array is updated for
421 // liveness during bottom-up scheduling. Top-down scheduling may proceed but
422 // has no affect on the pressure diffs.
423 PressureDiffs SUPressureDiffs;
424
425 /// Register pressure in this region computed by initRegPressure.
426 bool ShouldTrackPressure = false;
427 bool ShouldTrackLaneMasks = false;
428 IntervalPressure RegPressure;
429 RegPressureTracker RPTracker;
430
431 /// List of pressure sets that exceed the target's pressure limit before
432 /// scheduling, listed in increasing set ID order. Each pressure set is paired
433 /// with its max pressure in the currently scheduled regions.
434 std::vector<PressureChange> RegionCriticalPSets;
435
436 /// The top of the unscheduled zone.
437 IntervalPressure TopPressure;
438 RegPressureTracker TopRPTracker;
439
440 /// The bottom of the unscheduled zone.
441 IntervalPressure BotPressure;
442 RegPressureTracker BotRPTracker;
443
444 public:
ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)445 ScheduleDAGMILive(MachineSchedContext *C,
446 std::unique_ptr<MachineSchedStrategy> S)
447 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false),
448 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure),
449 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
450
451 ~ScheduleDAGMILive() override;
452
453 /// Return true if this DAG supports VReg liveness and RegPressure.
hasVRegLiveness()454 bool hasVRegLiveness() const override { return true; }
455
456 /// Return true if register pressure tracking is enabled.
isTrackingPressure()457 bool isTrackingPressure() const { return ShouldTrackPressure; }
458
459 /// Get current register pressure for the top scheduled instructions.
getTopPressure()460 const IntervalPressure &getTopPressure() const { return TopPressure; }
getTopRPTracker()461 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
462
463 /// Get current register pressure for the bottom scheduled instructions.
getBotPressure()464 const IntervalPressure &getBotPressure() const { return BotPressure; }
getBotRPTracker()465 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
466
467 /// Get register pressure for the entire scheduling region before scheduling.
getRegPressure()468 const IntervalPressure &getRegPressure() const { return RegPressure; }
469
getRegionCriticalPSets()470 const std::vector<PressureChange> &getRegionCriticalPSets() const {
471 return RegionCriticalPSets;
472 }
473
getPressureDiff(const SUnit * SU)474 PressureDiff &getPressureDiff(const SUnit *SU) {
475 return SUPressureDiffs[SU->NodeNum];
476 }
getPressureDiff(const SUnit * SU)477 const PressureDiff &getPressureDiff(const SUnit *SU) const {
478 return SUPressureDiffs[SU->NodeNum];
479 }
480
481 /// Compute a DFSResult after DAG building is complete, and before any
482 /// queue comparisons.
483 void computeDFSResult();
484
485 /// Return a non-null DFS result if the scheduling strategy initialized it.
getDFSResult()486 const SchedDFSResult *getDFSResult() const { return DFSResult; }
487
getScheduledTrees()488 BitVector &getScheduledTrees() { return ScheduledTrees; }
489
490 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
491 /// region. This covers all instructions in a block, while schedule() may only
492 /// cover a subset.
493 void enterRegion(MachineBasicBlock *bb,
494 MachineBasicBlock::iterator begin,
495 MachineBasicBlock::iterator end,
496 unsigned regioninstrs) override;
497
498 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
499 /// reorderable instructions.
500 void schedule() override;
501
502 /// Compute the cyclic critical path through the DAG.
503 unsigned computeCyclicCriticalPath();
504
505 void dump() const override;
506
507 protected:
508 // Top-Level entry points for the schedule() driver...
509
510 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
511 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG
512 /// region, TopTracker and BottomTracker will be initialized to the top and
513 /// bottom of the DAG region without covereing any unscheduled instruction.
514 void buildDAGWithRegPressure();
515
516 /// Release ExitSU predecessors and setup scheduler queues. Re-position
517 /// the Top RP tracker in case the region beginning has changed.
518 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
519
520 /// Move an instruction and update register pressure.
521 void scheduleMI(SUnit *SU, bool IsTopNode);
522
523 // Lesser helpers...
524
525 void initRegPressure();
526
527 void updatePressureDiffs(ArrayRef<VRegMaskOrUnit> LiveUses);
528
529 void updateScheduledPressure(const SUnit *SU,
530 const std::vector<unsigned> &NewMaxPressure);
531
532 void collectVRegUses(SUnit &SU);
533 };
534
535 //===----------------------------------------------------------------------===//
536 ///
537 /// Helpers for implementing custom MachineSchedStrategy classes. These take
538 /// care of the book-keeping associated with list scheduling heuristics.
539 ///
540 //===----------------------------------------------------------------------===//
541
542 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
543 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
544 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
545 ///
546 /// This is a convenience class that may be used by implementations of
547 /// MachineSchedStrategy.
548 class ReadyQueue {
549 unsigned ID;
550 std::string Name;
551 std::vector<SUnit*> Queue;
552
553 public:
ReadyQueue(unsigned id,const Twine & name)554 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
555
getID()556 unsigned getID() const { return ID; }
557
getName()558 StringRef getName() const { return Name; }
559
560 // SU is in this queue if it's NodeQueueID is a superset of this ID.
isInQueue(SUnit * SU)561 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
562
empty()563 bool empty() const { return Queue.empty(); }
564
clear()565 void clear() { Queue.clear(); }
566
size()567 unsigned size() const { return Queue.size(); }
568
569 using iterator = std::vector<SUnit*>::iterator;
570
begin()571 iterator begin() { return Queue.begin(); }
572
end()573 iterator end() { return Queue.end(); }
574
elements()575 ArrayRef<SUnit*> elements() { return Queue; }
576
find(SUnit * SU)577 iterator find(SUnit *SU) { return llvm::find(Queue, SU); }
578
push(SUnit * SU)579 void push(SUnit *SU) {
580 Queue.push_back(SU);
581 SU->NodeQueueId |= ID;
582 }
583
remove(iterator I)584 iterator remove(iterator I) {
585 (*I)->NodeQueueId &= ~ID;
586 *I = Queue.back();
587 unsigned idx = I - Queue.begin();
588 Queue.pop_back();
589 return Queue.begin() + idx;
590 }
591
592 LLVM_ABI void dump() const;
593 };
594
595 /// Summarize the unscheduled region.
596 struct SchedRemainder {
597 // Critical path through the DAG in expected latency.
598 unsigned CriticalPath;
599 unsigned CyclicCritPath;
600
601 // Scaled count of micro-ops left to schedule.
602 unsigned RemIssueCount;
603
604 bool IsAcyclicLatencyLimited;
605
606 // Unscheduled resources
607 SmallVector<unsigned, 16> RemainingCounts;
608
SchedRemainderSchedRemainder609 SchedRemainder() { reset(); }
610
resetSchedRemainder611 void reset() {
612 CriticalPath = 0;
613 CyclicCritPath = 0;
614 RemIssueCount = 0;
615 IsAcyclicLatencyLimited = false;
616 RemainingCounts.clear();
617 }
618
619 LLVM_ABI void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
620 };
621
622 /// ResourceSegments are a collection of intervals closed on the
623 /// left and opened on the right:
624 ///
625 /// list{ [a1, b1), [a2, b2), ..., [a_N, b_N) }
626 ///
627 /// The collection has the following properties:
628 ///
629 /// 1. The list is ordered: a_i < b_i and b_i < a_(i+1)
630 ///
631 /// 2. The intervals in the collection do not intersect each other.
632 ///
633 /// A \ref ResourceSegments instance represents the cycle
634 /// reservation history of the instance of and individual resource.
635 class ResourceSegments {
636 public:
637 /// Represents an interval of discrete integer values closed on
638 /// the left and open on the right: [a, b).
639 typedef std::pair<int64_t, int64_t> IntervalTy;
640
641 /// Adds an interval [a, b) to the collection of the instance.
642 ///
643 /// When adding [a, b[ to the collection, the operation merges the
644 /// adjacent intervals. For example
645 ///
646 /// 0 1 2 3 4 5 6 7 8 9 10
647 /// [-----) [--) [--)
648 /// + [--)
649 /// = [-----------) [--)
650 ///
651 /// To be able to debug duplicate resource usage, the function has
652 /// assertion that checks that no interval should be added if it
653 /// overlaps any of the intervals in the collection. We can
654 /// require this because by definition a \ref ResourceSegments is
655 /// attached only to an individual resource instance.
656 LLVM_ABI void add(IntervalTy A, const unsigned CutOff = 10);
657
658 public:
659 /// Checks whether intervals intersect.
660 LLVM_ABI static bool intersects(IntervalTy A, IntervalTy B);
661
662 /// These function return the interval used by a resource in bottom and top
663 /// scheduling.
664 ///
665 /// Consider an instruction that uses resources X0, X1 and X2 as follows:
666 ///
667 /// X0 X1 X1 X2 +--------+-------------+--------------+
668 /// |Resource|AcquireAtCycle|ReleaseAtCycle|
669 /// +--------+-------------+--------------+
670 /// | X0 | 0 | 1 |
671 /// +--------+-------------+--------------+
672 /// | X1 | 1 | 3 |
673 /// +--------+-------------+--------------+
674 /// | X2 | 3 | 4 |
675 /// +--------+-------------+--------------+
676 ///
677 /// If we can schedule the instruction at cycle C, we need to
678 /// compute the interval of the resource as follows:
679 ///
680 /// # TOP DOWN SCHEDULING
681 ///
682 /// Cycles scheduling flows to the _right_, in the same direction
683 /// of time.
684 ///
685 /// C 1 2 3 4 5 ...
686 /// ------|------|------|------|------|------|----->
687 /// X0 X1 X1 X2 ---> direction of time
688 /// X0 [C, C+1)
689 /// X1 [C+1, C+3)
690 /// X2 [C+3, C+4)
691 ///
692 /// Therefore, the formula to compute the interval for a resource
693 /// of an instruction that can be scheduled at cycle C in top-down
694 /// scheduling is:
695 ///
696 /// [C+AcquireAtCycle, C+ReleaseAtCycle)
697 ///
698 ///
699 /// # BOTTOM UP SCHEDULING
700 ///
701 /// Cycles scheduling flows to the _left_, in opposite direction
702 /// of time.
703 ///
704 /// In bottom up scheduling, the scheduling happens in opposite
705 /// direction to the execution of the cycles of the
706 /// instruction. When the instruction is scheduled at cycle `C`,
707 /// the resources are allocated in the past relative to `C`:
708 ///
709 /// 2 1 C -1 -2 -3 -4 -5 ...
710 /// <-----|------|------|------|------|------|------|------|---
711 /// X0 X1 X1 X2 ---> direction of time
712 /// X0 (C+1, C]
713 /// X1 (C, C-2]
714 /// X2 (C-2, C-3]
715 ///
716 /// Therefore, the formula to compute the interval for a resource
717 /// of an instruction that can be scheduled at cycle C in bottom-up
718 /// scheduling is:
719 ///
720 /// [C-ReleaseAtCycle+1, C-AcquireAtCycle+1)
721 ///
722 ///
723 /// NOTE: In both cases, the number of cycles booked by a
724 /// resources is the value (ReleaseAtCycle - AcquireAtCycle).
getResourceIntervalBottom(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)725 static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle,
726 unsigned ReleaseAtCycle) {
727 return std::make_pair<long, long>((long)C - (long)ReleaseAtCycle + 1L,
728 (long)C - (long)AcquireAtCycle + 1L);
729 }
getResourceIntervalTop(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)730 static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle,
731 unsigned ReleaseAtCycle) {
732 return std::make_pair<long, long>((long)C + (long)AcquireAtCycle,
733 (long)C + (long)ReleaseAtCycle);
734 }
735
736 private:
737 /// Finds the first cycle in which a resource can be allocated.
738 ///
739 /// The function uses the \param IntervalBuider [*] to build a
740 /// resource interval [a, b[ out of the input parameters \param
741 /// CurrCycle, \param AcquireAtCycle and \param ReleaseAtCycle.
742 ///
743 /// The function then loops through the intervals in the ResourceSegments
744 /// and shifts the interval [a, b[ and the ReturnCycle to the
745 /// right until there is no intersection between the intervals of
746 /// the \ref ResourceSegments instance and the new shifted [a, b[. When
747 /// this condition is met, the ReturnCycle (which
748 /// correspond to the cycle in which the resource can be
749 /// allocated) is returned.
750 ///
751 /// c = CurrCycle in input
752 /// c 1 2 3 4 5 6 7 8 9 10 ... ---> (time
753 /// flow)
754 /// ResourceSegments... [---) [-------) [-----------)
755 /// c [1 3[ -> AcquireAtCycle=1, ReleaseAtCycle=3
756 /// ++c [1 3)
757 /// ++c [1 3)
758 /// ++c [1 3)
759 /// ++c [1 3)
760 /// ++c [1 3) ---> returns c
761 /// incremented by 5 (c+5)
762 ///
763 ///
764 /// Notice that for bottom-up scheduling the diagram is slightly
765 /// different because the current cycle c is always on the right
766 /// of the interval [a, b) (see \ref
767 /// `getResourceIntervalBottom`). This is because the cycle
768 /// increments for bottom-up scheduling moved in the direction
769 /// opposite to the direction of time:
770 ///
771 /// --------> direction of time.
772 /// XXYZZZ (resource usage)
773 /// --------> direction of top-down execution cycles.
774 /// <-------- direction of bottom-up execution cycles.
775 ///
776 /// Even though bottom-up scheduling moves against the flow of
777 /// time, the algorithm used to find the first free slot in between
778 /// intervals is the same as for top-down scheduling.
779 ///
780 /// [*] See \ref `getResourceIntervalTop` and
781 /// \ref `getResourceIntervalBottom` to see how such resource intervals
782 /// are built.
783 LLVM_ABI unsigned getFirstAvailableAt(
784 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
785 std::function<IntervalTy(unsigned, unsigned, unsigned)> IntervalBuilder)
786 const;
787
788 public:
789 /// getFirstAvailableAtFromBottom and getFirstAvailableAtFromTop
790 /// should be merged in a single function in which a function that
791 /// creates the `NewInterval` is passed as a parameter.
getFirstAvailableAtFromBottom(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)792 unsigned getFirstAvailableAtFromBottom(unsigned CurrCycle,
793 unsigned AcquireAtCycle,
794 unsigned ReleaseAtCycle) const {
795 return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
796 getResourceIntervalBottom);
797 }
getFirstAvailableAtFromTop(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)798 unsigned getFirstAvailableAtFromTop(unsigned CurrCycle,
799 unsigned AcquireAtCycle,
800 unsigned ReleaseAtCycle) const {
801 return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
802 getResourceIntervalTop);
803 }
804
805 private:
806 std::list<IntervalTy> _Intervals;
807 /// Merge all adjacent intervals in the collection. For all pairs
808 /// of adjacient intervals, it performs [a, b) + [b, c) -> [a, c).
809 ///
810 /// Before performing the merge operation, the intervals are
811 /// sorted with \ref sort_predicate.
812 LLVM_ABI void sortAndMerge();
813
814 public:
815 // constructor for empty set
ResourceSegments()816 explicit ResourceSegments(){};
empty()817 bool empty() const { return _Intervals.empty(); }
ResourceSegments(const std::list<IntervalTy> & Intervals)818 explicit ResourceSegments(const std::list<IntervalTy> &Intervals)
819 : _Intervals(Intervals) {
820 sortAndMerge();
821 }
822
823 friend bool operator==(const ResourceSegments &c1,
824 const ResourceSegments &c2) {
825 return c1._Intervals == c2._Intervals;
826 }
827 friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
828 const ResourceSegments &Segments) {
829 os << "{ ";
830 for (auto p : Segments._Intervals)
831 os << "[" << p.first << ", " << p.second << "), ";
832 os << "}\n";
833 return os;
834 }
835 };
836
837 /// Each Scheduling boundary is associated with ready queues. It tracks the
838 /// current cycle in the direction of movement, and maintains the state
839 /// of "hazards" and other interlocks at the current cycle.
840 class SchedBoundary {
841 public:
842 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
843 enum {
844 TopQID = 1,
845 BotQID = 2,
846 LogMaxQID = 2
847 };
848
849 ScheduleDAGMI *DAG = nullptr;
850 const TargetSchedModel *SchedModel = nullptr;
851 SchedRemainder *Rem = nullptr;
852
853 ReadyQueue Available;
854 ReadyQueue Pending;
855
856 ScheduleHazardRecognizer *HazardRec = nullptr;
857
858 private:
859 /// True if the pending Q should be checked/updated before scheduling another
860 /// instruction.
861 bool CheckPending;
862
863 /// Number of cycles it takes to issue the instructions scheduled in this
864 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
865 /// See getStalls().
866 unsigned CurrCycle;
867
868 /// Micro-ops issued in the current cycle
869 unsigned CurrMOps;
870
871 /// MinReadyCycle - Cycle of the soonest available instruction.
872 unsigned MinReadyCycle;
873
874 // The expected latency of the critical path in this scheduled zone.
875 unsigned ExpectedLatency;
876
877 // The latency of dependence chains leading into this zone.
878 // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
879 // For each cycle scheduled: DLat -= 1.
880 unsigned DependentLatency;
881
882 /// Count the scheduled (issued) micro-ops that can be retired by
883 /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
884 unsigned RetiredMOps;
885
886 // Count scheduled resources that have been executed. Resources are
887 // considered executed if they become ready in the time that it takes to
888 // saturate any resource including the one in question. Counts are scaled
889 // for direct comparison with other resources. Counts can be compared with
890 // MOps * getMicroOpFactor and Latency * getLatencyFactor.
891 SmallVector<unsigned, 16> ExecutedResCounts;
892
893 /// Cache the max count for a single resource.
894 unsigned MaxExecutedResCount;
895
896 // Cache the critical resources ID in this scheduled zone.
897 unsigned ZoneCritResIdx;
898
899 // Is the scheduled region resource limited vs. latency limited.
900 bool IsResourceLimited;
901
902 public:
903 private:
904 /// Record how resources have been allocated across the cycles of
905 /// the execution.
906 std::map<unsigned, ResourceSegments> ReservedResourceSegments;
907 std::vector<unsigned> ReservedCycles;
908 /// For each PIdx, stores first index into ReservedResourceSegments that
909 /// corresponds to it.
910 ///
911 /// For example, consider the following 3 resources (ResourceCount =
912 /// 3):
913 ///
914 /// +------------+--------+
915 /// |ResourceName|NumUnits|
916 /// +------------+--------+
917 /// | X | 2 |
918 /// +------------+--------+
919 /// | Y | 3 |
920 /// +------------+--------+
921 /// | Z | 1 |
922 /// +------------+--------+
923 ///
924 /// In this case, the total number of resource instances is 6. The
925 /// vector \ref ReservedResourceSegments will have a slot for each instance.
926 /// The vector \ref ReservedCyclesIndex will track at what index the first
927 /// instance of the resource is found in the vector of \ref
928 /// ReservedResourceSegments:
929 ///
930 /// Indexes of instances in
931 /// ReservedResourceSegments
932 ///
933 /// 0 1 2 3 4 5
934 /// ReservedCyclesIndex[0] = 0; [X0, X1,
935 /// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2
936 /// ReservedCyclesIndex[2] = 5; Z
937 SmallVector<unsigned, 16> ReservedCyclesIndex;
938
939 // For each PIdx, stores the resource group IDs of its subunits
940 SmallVector<APInt, 16> ResourceGroupSubUnitMasks;
941
942 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
943 // Remember the greatest possible stall as an upper bound on the number of
944 // times we should retry the pending queue because of a hazard.
945 unsigned MaxObservedStall;
946 #endif
947
948 public:
949 /// Pending queues extend the ready queues with the same ID and the
950 /// PendingFlag set.
SchedBoundary(unsigned ID,const Twine & Name)951 SchedBoundary(unsigned ID, const Twine &Name):
952 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") {
953 reset();
954 }
955 SchedBoundary &operator=(const SchedBoundary &other) = delete;
956 SchedBoundary(const SchedBoundary &other) = delete;
957 LLVM_ABI ~SchedBoundary();
958
959 LLVM_ABI void reset();
960
961 LLVM_ABI void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
962 SchedRemainder *rem);
963
isTop()964 bool isTop() const {
965 return Available.getID() == TopQID;
966 }
967
968 /// Number of cycles to issue the instructions scheduled in this zone.
getCurrCycle()969 unsigned getCurrCycle() const { return CurrCycle; }
970
971 /// Micro-ops issued in the current cycle
getCurrMOps()972 unsigned getCurrMOps() const { return CurrMOps; }
973
974 // The latency of dependence chains leading into this zone.
getDependentLatency()975 unsigned getDependentLatency() const { return DependentLatency; }
976
977 /// Get the number of latency cycles "covered" by the scheduled
978 /// instructions. This is the larger of the critical path within the zone
979 /// and the number of cycles required to issue the instructions.
getScheduledLatency()980 unsigned getScheduledLatency() const {
981 return std::max(ExpectedLatency, CurrCycle);
982 }
983
getUnscheduledLatency(SUnit * SU)984 unsigned getUnscheduledLatency(SUnit *SU) const {
985 return isTop() ? SU->getHeight() : SU->getDepth();
986 }
987
getResourceCount(unsigned ResIdx)988 unsigned getResourceCount(unsigned ResIdx) const {
989 return ExecutedResCounts[ResIdx];
990 }
991
992 /// Get the scaled count of scheduled micro-ops and resources, including
993 /// executed resources.
getCriticalCount()994 unsigned getCriticalCount() const {
995 if (!ZoneCritResIdx)
996 return RetiredMOps * SchedModel->getMicroOpFactor();
997 return getResourceCount(ZoneCritResIdx);
998 }
999
1000 /// Get a scaled count for the minimum execution time of the scheduled
1001 /// micro-ops that are ready to execute by getExecutedCount. Notice the
1002 /// feedback loop.
getExecutedCount()1003 unsigned getExecutedCount() const {
1004 return std::max(CurrCycle * SchedModel->getLatencyFactor(),
1005 MaxExecutedResCount);
1006 }
1007
getZoneCritResIdx()1008 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; }
1009
1010 // Is the scheduled region resource limited vs. latency limited.
isResourceLimited()1011 bool isResourceLimited() const { return IsResourceLimited; }
1012
1013 /// Get the difference between the given SUnit's ready time and the current
1014 /// cycle.
1015 LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU);
1016
1017 LLVM_ABI unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
1018 unsigned ReleaseAtCycle,
1019 unsigned AcquireAtCycle);
1020
1021 LLVM_ABI std::pair<unsigned, unsigned>
1022 getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
1023 unsigned ReleaseAtCycle, unsigned AcquireAtCycle);
1024
isUnbufferedGroup(unsigned PIdx)1025 bool isUnbufferedGroup(unsigned PIdx) const {
1026 return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin &&
1027 !SchedModel->getProcResource(PIdx)->BufferSize;
1028 }
1029
1030 LLVM_ABI bool checkHazard(SUnit *SU);
1031
1032 LLVM_ABI unsigned findMaxLatency(ArrayRef<SUnit *> ReadySUs);
1033
1034 LLVM_ABI unsigned getOtherResourceCount(unsigned &OtherCritIdx);
1035
1036 /// Release SU to make it ready. If it's not in hazard, remove it from
1037 /// pending queue (if already in) and push into available queue.
1038 /// Otherwise, push the SU into pending queue.
1039 ///
1040 /// @param SU The unit to be released.
1041 /// @param ReadyCycle Until which cycle the unit is ready.
1042 /// @param InPQueue Whether SU is already in pending queue.
1043 /// @param Idx Position offset in pending queue (if in it).
1044 LLVM_ABI void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
1045 unsigned Idx = 0);
1046
1047 LLVM_ABI void bumpCycle(unsigned NextCycle);
1048
1049 LLVM_ABI void incExecutedResources(unsigned PIdx, unsigned Count);
1050
1051 LLVM_ABI unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx,
1052 unsigned Cycles, unsigned ReadyCycle,
1053 unsigned StartAtCycle);
1054
1055 LLVM_ABI void bumpNode(SUnit *SU);
1056
1057 LLVM_ABI void releasePending();
1058
1059 LLVM_ABI void removeReady(SUnit *SU);
1060
1061 /// Call this before applying any other heuristics to the Available queue.
1062 /// Updates the Available/Pending Q's if necessary and returns the single
1063 /// available instruction, or NULL if there are multiple candidates.
1064 LLVM_ABI SUnit *pickOnlyChoice();
1065
1066 /// Dump the state of the information that tracks resource usage.
1067 LLVM_ABI void dumpReservedCycles() const;
1068 LLVM_ABI void dumpScheduledState() const;
1069 };
1070
1071 /// Base class for GenericScheduler. This class maintains information about
1072 /// scheduling candidates based on TargetSchedModel making it easy to implement
1073 /// heuristics for either preRA or postRA scheduling.
1074 class GenericSchedulerBase : public MachineSchedStrategy {
1075 public:
1076 /// Represent the type of SchedCandidate found within a single queue.
1077 /// pickNodeBidirectional depends on these listed by decreasing priority.
1078 enum CandReason : uint8_t {
1079 NoCand,
1080 Only1,
1081 PhysReg,
1082 RegExcess,
1083 RegCritical,
1084 Stall,
1085 Cluster,
1086 Weak,
1087 RegMax,
1088 ResourceReduce,
1089 ResourceDemand,
1090 BotHeightReduce,
1091 BotPathReduce,
1092 TopDepthReduce,
1093 TopPathReduce,
1094 NodeOrder,
1095 FirstValid
1096 };
1097
1098 #ifndef NDEBUG
1099 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
1100 #endif
1101
1102 /// Policy for scheduling the next instruction in the candidate's zone.
1103 struct CandPolicy {
1104 bool ReduceLatency = false;
1105 unsigned ReduceResIdx = 0;
1106 unsigned DemandResIdx = 0;
1107
1108 CandPolicy() = default;
1109
1110 bool operator==(const CandPolicy &RHS) const {
1111 return ReduceLatency == RHS.ReduceLatency &&
1112 ReduceResIdx == RHS.ReduceResIdx &&
1113 DemandResIdx == RHS.DemandResIdx;
1114 }
1115 bool operator!=(const CandPolicy &RHS) const {
1116 return !(*this == RHS);
1117 }
1118 };
1119
1120 /// Status of an instruction's critical resource consumption.
1121 struct SchedResourceDelta {
1122 // Count critical resources in the scheduled region required by SU.
1123 unsigned CritResources = 0;
1124
1125 // Count critical resources from another region consumed by SU.
1126 unsigned DemandedResources = 0;
1127
1128 SchedResourceDelta() = default;
1129
1130 bool operator==(const SchedResourceDelta &RHS) const {
1131 return CritResources == RHS.CritResources
1132 && DemandedResources == RHS.DemandedResources;
1133 }
1134 bool operator!=(const SchedResourceDelta &RHS) const {
1135 return !operator==(RHS);
1136 }
1137 };
1138
1139 /// Store the state used by GenericScheduler heuristics, required for the
1140 /// lifetime of one invocation of pickNode().
1141 struct SchedCandidate {
1142 CandPolicy Policy;
1143
1144 // The best SUnit candidate.
1145 SUnit *SU;
1146
1147 // The reason for this candidate.
1148 CandReason Reason;
1149
1150 // Whether this candidate should be scheduled at top/bottom.
1151 bool AtTop;
1152
1153 // Register pressure values for the best candidate.
1154 RegPressureDelta RPDelta;
1155
1156 // Critical resource consumption of the best candidate.
1157 SchedResourceDelta ResDelta;
1158
SchedCandidateSchedCandidate1159 SchedCandidate() { reset(CandPolicy()); }
SchedCandidateSchedCandidate1160 SchedCandidate(const CandPolicy &Policy) { reset(Policy); }
1161
resetSchedCandidate1162 void reset(const CandPolicy &NewPolicy) {
1163 Policy = NewPolicy;
1164 SU = nullptr;
1165 Reason = NoCand;
1166 AtTop = false;
1167 RPDelta = RegPressureDelta();
1168 ResDelta = SchedResourceDelta();
1169 }
1170
isValidSchedCandidate1171 bool isValid() const { return SU; }
1172
1173 // Copy the status of another candidate without changing policy.
setBestSchedCandidate1174 void setBest(SchedCandidate &Best) {
1175 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1176 SU = Best.SU;
1177 Reason = Best.Reason;
1178 AtTop = Best.AtTop;
1179 RPDelta = Best.RPDelta;
1180 ResDelta = Best.ResDelta;
1181 }
1182
1183 LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG,
1184 const TargetSchedModel *SchedModel);
1185 };
1186
1187 protected:
1188 const MachineSchedContext *Context;
1189 const TargetSchedModel *SchedModel = nullptr;
1190 const TargetRegisterInfo *TRI = nullptr;
1191 unsigned TopIdx = 0;
1192 unsigned BotIdx = 0;
1193 unsigned NumRegionInstrs = 0;
1194
1195 MachineSchedPolicy RegionPolicy;
1196
1197 SchedRemainder Rem;
1198
GenericSchedulerBase(const MachineSchedContext * C)1199 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {}
1200
1201 LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA,
1202 SchedBoundary &CurrZone, SchedBoundary *OtherZone);
1203
getPolicy()1204 MachineSchedPolicy getPolicy() const override { return RegionPolicy; }
1205
1206 #ifndef NDEBUG
1207 void traceCandidate(const SchedCandidate &Cand);
1208 #endif
1209
1210 private:
1211 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
1212 bool ComputeRemLatency, unsigned &RemLatency) const;
1213 };
1214
1215 // Utility functions used by heuristics in tryCandidate().
1216 LLVM_ABI bool tryLess(int TryVal, int CandVal,
1217 GenericSchedulerBase::SchedCandidate &TryCand,
1218 GenericSchedulerBase::SchedCandidate &Cand,
1219 GenericSchedulerBase::CandReason Reason);
1220 LLVM_ABI bool tryGreater(int TryVal, int CandVal,
1221 GenericSchedulerBase::SchedCandidate &TryCand,
1222 GenericSchedulerBase::SchedCandidate &Cand,
1223 GenericSchedulerBase::CandReason Reason);
1224 LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
1225 GenericSchedulerBase::SchedCandidate &Cand,
1226 SchedBoundary &Zone);
1227 LLVM_ABI bool tryPressure(const PressureChange &TryP,
1228 const PressureChange &CandP,
1229 GenericSchedulerBase::SchedCandidate &TryCand,
1230 GenericSchedulerBase::SchedCandidate &Cand,
1231 GenericSchedulerBase::CandReason Reason,
1232 const TargetRegisterInfo *TRI,
1233 const MachineFunction &MF);
1234 LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop);
1235 LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop);
1236
1237 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
1238 /// the schedule.
1239 class LLVM_ABI GenericScheduler : public GenericSchedulerBase {
1240 public:
GenericScheduler(const MachineSchedContext * C)1241 GenericScheduler(const MachineSchedContext *C):
1242 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1243 Bot(SchedBoundary::BotQID, "BotQ") {}
1244
1245 void initPolicy(MachineBasicBlock::iterator Begin,
1246 MachineBasicBlock::iterator End,
1247 unsigned NumRegionInstrs) override;
1248
1249 void dumpPolicy() const override;
1250
shouldTrackPressure()1251 bool shouldTrackPressure() const override {
1252 return RegionPolicy.ShouldTrackPressure;
1253 }
1254
shouldTrackLaneMasks()1255 bool shouldTrackLaneMasks() const override {
1256 return RegionPolicy.ShouldTrackLaneMasks;
1257 }
1258
1259 void initialize(ScheduleDAGMI *dag) override;
1260
1261 SUnit *pickNode(bool &IsTopNode) override;
1262
1263 void schedNode(SUnit *SU, bool IsTopNode) override;
1264
releaseTopNode(SUnit * SU)1265 void releaseTopNode(SUnit *SU) override {
1266 if (SU->isScheduled)
1267 return;
1268
1269 Top.releaseNode(SU, SU->TopReadyCycle, false);
1270 TopCand.SU = nullptr;
1271 }
1272
releaseBottomNode(SUnit * SU)1273 void releaseBottomNode(SUnit *SU) override {
1274 if (SU->isScheduled)
1275 return;
1276
1277 Bot.releaseNode(SU, SU->BotReadyCycle, false);
1278 BotCand.SU = nullptr;
1279 }
1280
1281 void registerRoots() override;
1282
1283 protected:
1284 ScheduleDAGMILive *DAG = nullptr;
1285
1286 // State of the top and bottom scheduled instruction boundaries.
1287 SchedBoundary Top;
1288 SchedBoundary Bot;
1289
1290 ClusterInfo *TopCluster;
1291 ClusterInfo *BotCluster;
1292
1293 /// Candidate last picked from Top boundary.
1294 SchedCandidate TopCand;
1295 /// Candidate last picked from Bot boundary.
1296 SchedCandidate BotCand;
1297
1298 void checkAcyclicLatency();
1299
1300 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
1301 const RegPressureTracker &RPTracker,
1302 RegPressureTracker &TempTracker);
1303
1304 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
1305 SchedBoundary *Zone) const;
1306
1307 SUnit *pickNodeBidirectional(bool &IsTopNode);
1308
1309 void pickNodeFromQueue(SchedBoundary &Zone,
1310 const CandPolicy &ZonePolicy,
1311 const RegPressureTracker &RPTracker,
1312 SchedCandidate &Candidate);
1313
1314 void reschedulePhysReg(SUnit *SU, bool isTop);
1315 };
1316
1317 /// PostGenericScheduler - Interface to the scheduling algorithm used by
1318 /// ScheduleDAGMI.
1319 ///
1320 /// Callbacks from ScheduleDAGMI:
1321 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
1322 class LLVM_ABI PostGenericScheduler : public GenericSchedulerBase {
1323 protected:
1324 ScheduleDAGMI *DAG = nullptr;
1325 SchedBoundary Top;
1326 SchedBoundary Bot;
1327
1328 /// Candidate last picked from Top boundary.
1329 SchedCandidate TopCand;
1330 /// Candidate last picked from Bot boundary.
1331 SchedCandidate BotCand;
1332
1333 ClusterInfo *TopCluster;
1334 ClusterInfo *BotCluster;
1335
1336 public:
PostGenericScheduler(const MachineSchedContext * C)1337 PostGenericScheduler(const MachineSchedContext *C)
1338 : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1339 Bot(SchedBoundary::BotQID, "BotQ") {}
1340
1341 ~PostGenericScheduler() override = default;
1342
1343 void initPolicy(MachineBasicBlock::iterator Begin,
1344 MachineBasicBlock::iterator End,
1345 unsigned NumRegionInstrs) override;
1346
1347 /// PostRA scheduling does not track pressure.
shouldTrackPressure()1348 bool shouldTrackPressure() const override { return false; }
1349
1350 void initialize(ScheduleDAGMI *Dag) override;
1351
1352 void registerRoots() override;
1353
1354 SUnit *pickNode(bool &IsTopNode) override;
1355
1356 SUnit *pickNodeBidirectional(bool &IsTopNode);
1357
scheduleTree(unsigned SubtreeID)1358 void scheduleTree(unsigned SubtreeID) override {
1359 llvm_unreachable("PostRA scheduler does not support subtree analysis.");
1360 }
1361
1362 void schedNode(SUnit *SU, bool IsTopNode) override;
1363
releaseTopNode(SUnit * SU)1364 void releaseTopNode(SUnit *SU) override {
1365 if (SU->isScheduled)
1366 return;
1367 Top.releaseNode(SU, SU->TopReadyCycle, false);
1368 TopCand.SU = nullptr;
1369 }
1370
releaseBottomNode(SUnit * SU)1371 void releaseBottomNode(SUnit *SU) override {
1372 if (SU->isScheduled)
1373 return;
1374 Bot.releaseNode(SU, SU->BotReadyCycle, false);
1375 BotCand.SU = nullptr;
1376 }
1377
1378 protected:
1379 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
1380
1381 void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
1382 };
1383
1384 /// If ReorderWhileClustering is set to true, no attempt will be made to
1385 /// reduce reordering due to store clustering.
1386 LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1387 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1388 const TargetRegisterInfo *TRI,
1389 bool ReorderWhileClustering = false);
1390
1391 /// If ReorderWhileClustering is set to true, no attempt will be made to
1392 /// reduce reordering due to store clustering.
1393 LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1394 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1395 const TargetRegisterInfo *TRI,
1396 bool ReorderWhileClustering = false);
1397
1398 LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1399 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1400 const TargetRegisterInfo *TRI);
1401
1402 /// Create the standard converging machine scheduler. This will be used as the
1403 /// default scheduler if the target does not set a default.
1404 /// Adds default DAG mutations.
1405 template <typename Strategy = GenericScheduler>
createSchedLive(MachineSchedContext * C)1406 ScheduleDAGMILive *createSchedLive(MachineSchedContext *C) {
1407 ScheduleDAGMILive *DAG =
1408 new ScheduleDAGMILive(C, std::make_unique<Strategy>(C));
1409 // Register DAG post-processors.
1410 //
1411 // FIXME: extend the mutation API to allow earlier mutations to instantiate
1412 // data and pass it to later mutations. Have a single mutation that gathers
1413 // the interesting nodes in one pass.
1414 DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
1415
1416 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
1417 // Add MacroFusion mutation if fusions are not empty.
1418 const auto &MacroFusions = STI.getMacroFusions();
1419 if (!MacroFusions.empty())
1420 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
1421 return DAG;
1422 }
1423
1424 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
1425 template <typename Strategy = PostGenericScheduler>
createSchedPostRA(MachineSchedContext * C)1426 ScheduleDAGMI *createSchedPostRA(MachineSchedContext *C) {
1427 ScheduleDAGMI *DAG = new ScheduleDAGMI(C, std::make_unique<Strategy>(C),
1428 /*RemoveKillFlags=*/true);
1429 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
1430 // Add MacroFusion mutation if fusions are not empty.
1431 const auto &MacroFusions = STI.getMacroFusions();
1432 if (!MacroFusions.empty())
1433 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
1434 return DAG;
1435 }
1436
1437 class MachineSchedulerPass : public PassInfoMixin<MachineSchedulerPass> {
1438 // FIXME: Remove this member once RegisterClassInfo is queryable as an
1439 // analysis.
1440 std::unique_ptr<impl_detail::MachineSchedulerImpl> Impl;
1441 const TargetMachine *TM;
1442
1443 public:
1444 LLVM_ABI MachineSchedulerPass(const TargetMachine *TM);
1445 LLVM_ABI MachineSchedulerPass(MachineSchedulerPass &&Other);
1446 LLVM_ABI ~MachineSchedulerPass();
1447 LLVM_ABI PreservedAnalyses run(MachineFunction &MF,
1448 MachineFunctionAnalysisManager &MFAM);
1449 };
1450
1451 class PostMachineSchedulerPass
1452 : public PassInfoMixin<PostMachineSchedulerPass> {
1453 // FIXME: Remove this member once RegisterClassInfo is queryable as an
1454 // analysis.
1455 std::unique_ptr<impl_detail::PostMachineSchedulerImpl> Impl;
1456 const TargetMachine *TM;
1457
1458 public:
1459 LLVM_ABI PostMachineSchedulerPass(const TargetMachine *TM);
1460 LLVM_ABI PostMachineSchedulerPass(PostMachineSchedulerPass &&Other);
1461 LLVM_ABI ~PostMachineSchedulerPass();
1462 LLVM_ABI PreservedAnalyses run(MachineFunction &MF,
1463 MachineFunctionAnalysisManager &MFAM);
1464 };
1465 } // end namespace llvm
1466
1467 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H
1468