xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp (revision cf6044857e2b20f2ecf90929027e9dd335b38803)
1  //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // MachineScheduler schedules machine instructions after phi elimination. It
10  // preserves LiveIntervals so it can be invoked before register allocation.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/CodeGen/MachineScheduler.h"
15  #include "llvm/ADT/ArrayRef.h"
16  #include "llvm/ADT/BitVector.h"
17  #include "llvm/ADT/DenseMap.h"
18  #include "llvm/ADT/PriorityQueue.h"
19  #include "llvm/ADT/STLExtras.h"
20  #include "llvm/ADT/SmallVector.h"
21  #include "llvm/ADT/Statistic.h"
22  #include "llvm/ADT/iterator_range.h"
23  #include "llvm/Analysis/AliasAnalysis.h"
24  #include "llvm/CodeGen/LiveInterval.h"
25  #include "llvm/CodeGen/LiveIntervals.h"
26  #include "llvm/CodeGen/MachineBasicBlock.h"
27  #include "llvm/CodeGen/MachineDominators.h"
28  #include "llvm/CodeGen/MachineFunction.h"
29  #include "llvm/CodeGen/MachineFunctionPass.h"
30  #include "llvm/CodeGen/MachineInstr.h"
31  #include "llvm/CodeGen/MachineLoopInfo.h"
32  #include "llvm/CodeGen/MachineOperand.h"
33  #include "llvm/CodeGen/MachinePassRegistry.h"
34  #include "llvm/CodeGen/MachineRegisterInfo.h"
35  #include "llvm/CodeGen/RegisterClassInfo.h"
36  #include "llvm/CodeGen/RegisterPressure.h"
37  #include "llvm/CodeGen/ScheduleDAG.h"
38  #include "llvm/CodeGen/ScheduleDAGInstrs.h"
39  #include "llvm/CodeGen/ScheduleDAGMutation.h"
40  #include "llvm/CodeGen/ScheduleDFS.h"
41  #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42  #include "llvm/CodeGen/SlotIndexes.h"
43  #include "llvm/CodeGen/TargetFrameLowering.h"
44  #include "llvm/CodeGen/TargetInstrInfo.h"
45  #include "llvm/CodeGen/TargetLowering.h"
46  #include "llvm/CodeGen/TargetPassConfig.h"
47  #include "llvm/CodeGen/TargetRegisterInfo.h"
48  #include "llvm/CodeGen/TargetSchedule.h"
49  #include "llvm/CodeGen/TargetSubtargetInfo.h"
50  #include "llvm/Config/llvm-config.h"
51  #include "llvm/InitializePasses.h"
52  #include "llvm/MC/LaneBitmask.h"
53  #include "llvm/Pass.h"
54  #include "llvm/Support/CommandLine.h"
55  #include "llvm/Support/Compiler.h"
56  #include "llvm/Support/Debug.h"
57  #include "llvm/Support/ErrorHandling.h"
58  #include "llvm/Support/GraphWriter.h"
59  #include "llvm/Support/MachineValueType.h"
60  #include "llvm/Support/raw_ostream.h"
61  #include <algorithm>
62  #include <cassert>
63  #include <cstdint>
64  #include <iterator>
65  #include <limits>
66  #include <memory>
67  #include <string>
68  #include <tuple>
69  #include <utility>
70  #include <vector>
71  
72  using namespace llvm;
73  
74  #define DEBUG_TYPE "machine-scheduler"
75  
76  STATISTIC(NumClustered, "Number of load/store pairs clustered");
77  
78  namespace llvm {
79  
80  cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81                             cl::desc("Force top-down list scheduling"));
82  cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83                              cl::desc("Force bottom-up list scheduling"));
84  cl::opt<bool>
85  DumpCriticalPathLength("misched-dcpl", cl::Hidden,
86                         cl::desc("Print critical path length to stdout"));
87  
88  cl::opt<bool> VerifyScheduling(
89      "verify-misched", cl::Hidden,
90      cl::desc("Verify machine instrs before and after machine scheduling"));
91  
92  #ifndef NDEBUG
93  cl::opt<bool> ViewMISchedDAGs(
94      "view-misched-dags", cl::Hidden,
95      cl::desc("Pop up a window to show MISched dags after they are processed"));
96  cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
97                          cl::desc("Print schedule DAGs"));
98  #else
99  const bool ViewMISchedDAGs = false;
100  const bool PrintDAGs = false;
101  #endif // NDEBUG
102  
103  } // end namespace llvm
104  
105  #ifndef NDEBUG
106  /// In some situations a few uninteresting nodes depend on nearly all other
107  /// nodes in the graph, provide a cutoff to hide them.
108  static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
109    cl::desc("Hide nodes with more predecessor/successor than cutoff"));
110  
111  static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
112    cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
113  
114  static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
115    cl::desc("Only schedule this function"));
116  static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
117                                          cl::desc("Only schedule this MBB#"));
118  #endif // NDEBUG
119  
120  /// Avoid quadratic complexity in unusually large basic blocks by limiting the
121  /// size of the ready lists.
122  static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
123    cl::desc("Limit ready list to N instructions"), cl::init(256));
124  
125  static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
126    cl::desc("Enable register pressure scheduling."), cl::init(true));
127  
128  static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
129    cl::desc("Enable cyclic critical path analysis."), cl::init(true));
130  
131  static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
132                                          cl::desc("Enable memop clustering."),
133                                          cl::init(true));
134  static cl::opt<bool>
135      ForceFastCluster("force-fast-cluster", cl::Hidden,
136                       cl::desc("Switch to fast cluster algorithm with the lost "
137                                "of some fusion opportunities"),
138                       cl::init(false));
139  static cl::opt<unsigned>
140      FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
141                           cl::desc("The threshold for fast cluster"),
142                           cl::init(1000));
143  
144  // DAG subtrees must have at least this many nodes.
145  static const unsigned MinSubtreeSize = 8;
146  
147  // Pin the vtables to this file.
148  void MachineSchedStrategy::anchor() {}
149  
150  void ScheduleDAGMutation::anchor() {}
151  
152  //===----------------------------------------------------------------------===//
153  // Machine Instruction Scheduling Pass and Registry
154  //===----------------------------------------------------------------------===//
155  
156  MachineSchedContext::MachineSchedContext() {
157    RegClassInfo = new RegisterClassInfo();
158  }
159  
160  MachineSchedContext::~MachineSchedContext() {
161    delete RegClassInfo;
162  }
163  
164  namespace {
165  
166  /// Base class for a machine scheduler class that can run at any point.
167  class MachineSchedulerBase : public MachineSchedContext,
168                               public MachineFunctionPass {
169  public:
170    MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
171  
172    void print(raw_ostream &O, const Module* = nullptr) const override;
173  
174  protected:
175    void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
176  };
177  
178  /// MachineScheduler runs after coalescing and before register allocation.
179  class MachineScheduler : public MachineSchedulerBase {
180  public:
181    MachineScheduler();
182  
183    void getAnalysisUsage(AnalysisUsage &AU) const override;
184  
185    bool runOnMachineFunction(MachineFunction&) override;
186  
187    static char ID; // Class identification, replacement for typeinfo
188  
189  protected:
190    ScheduleDAGInstrs *createMachineScheduler();
191  };
192  
193  /// PostMachineScheduler runs after shortly before code emission.
194  class PostMachineScheduler : public MachineSchedulerBase {
195  public:
196    PostMachineScheduler();
197  
198    void getAnalysisUsage(AnalysisUsage &AU) const override;
199  
200    bool runOnMachineFunction(MachineFunction&) override;
201  
202    static char ID; // Class identification, replacement for typeinfo
203  
204  protected:
205    ScheduleDAGInstrs *createPostMachineScheduler();
206  };
207  
208  } // end anonymous namespace
209  
210  char MachineScheduler::ID = 0;
211  
212  char &llvm::MachineSchedulerID = MachineScheduler::ID;
213  
214  INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
215                        "Machine Instruction Scheduler", false, false)
216  INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
217  INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
218  INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
219  INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
220  INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
221  INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
222                      "Machine Instruction Scheduler", false, false)
223  
224  MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
225    initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
226  }
227  
228  void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
229    AU.setPreservesCFG();
230    AU.addRequired<MachineDominatorTree>();
231    AU.addRequired<MachineLoopInfo>();
232    AU.addRequired<AAResultsWrapperPass>();
233    AU.addRequired<TargetPassConfig>();
234    AU.addRequired<SlotIndexes>();
235    AU.addPreserved<SlotIndexes>();
236    AU.addRequired<LiveIntervals>();
237    AU.addPreserved<LiveIntervals>();
238    MachineFunctionPass::getAnalysisUsage(AU);
239  }
240  
241  char PostMachineScheduler::ID = 0;
242  
243  char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
244  
245  INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
246                        "PostRA Machine Instruction Scheduler", false, false)
247  INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
248  INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
249  INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
250  INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
251                      "PostRA Machine Instruction Scheduler", false, false)
252  
253  PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
254    initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
255  }
256  
257  void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
258    AU.setPreservesCFG();
259    AU.addRequired<MachineDominatorTree>();
260    AU.addRequired<MachineLoopInfo>();
261    AU.addRequired<AAResultsWrapperPass>();
262    AU.addRequired<TargetPassConfig>();
263    MachineFunctionPass::getAnalysisUsage(AU);
264  }
265  
266  MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
267      MachineSchedRegistry::Registry;
268  
269  /// A dummy default scheduler factory indicates whether the scheduler
270  /// is overridden on the command line.
271  static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
272    return nullptr;
273  }
274  
275  /// MachineSchedOpt allows command line selection of the scheduler.
276  static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
277                 RegisterPassParser<MachineSchedRegistry>>
278  MachineSchedOpt("misched",
279                  cl::init(&useDefaultMachineSched), cl::Hidden,
280                  cl::desc("Machine instruction scheduler to use"));
281  
282  static MachineSchedRegistry
283  DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
284                       useDefaultMachineSched);
285  
286  static cl::opt<bool> EnableMachineSched(
287      "enable-misched",
288      cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
289      cl::Hidden);
290  
291  static cl::opt<bool> EnablePostRAMachineSched(
292      "enable-post-misched",
293      cl::desc("Enable the post-ra machine instruction scheduling pass."),
294      cl::init(true), cl::Hidden);
295  
296  /// Decrement this iterator until reaching the top or a non-debug instr.
297  static MachineBasicBlock::const_iterator
298  priorNonDebug(MachineBasicBlock::const_iterator I,
299                MachineBasicBlock::const_iterator Beg) {
300    assert(I != Beg && "reached the top of the region, cannot decrement");
301    while (--I != Beg) {
302      if (!I->isDebugOrPseudoInstr())
303        break;
304    }
305    return I;
306  }
307  
308  /// Non-const version.
309  static MachineBasicBlock::iterator
310  priorNonDebug(MachineBasicBlock::iterator I,
311                MachineBasicBlock::const_iterator Beg) {
312    return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
313        .getNonConstIterator();
314  }
315  
316  /// If this iterator is a debug value, increment until reaching the End or a
317  /// non-debug instruction.
318  static MachineBasicBlock::const_iterator
319  nextIfDebug(MachineBasicBlock::const_iterator I,
320              MachineBasicBlock::const_iterator End) {
321    for(; I != End; ++I) {
322      if (!I->isDebugOrPseudoInstr())
323        break;
324    }
325    return I;
326  }
327  
328  /// Non-const version.
329  static MachineBasicBlock::iterator
330  nextIfDebug(MachineBasicBlock::iterator I,
331              MachineBasicBlock::const_iterator End) {
332    return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
333        .getNonConstIterator();
334  }
335  
336  /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
337  ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
338    // Select the scheduler, or set the default.
339    MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
340    if (Ctor != useDefaultMachineSched)
341      return Ctor(this);
342  
343    // Get the default scheduler set by the target for this function.
344    ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
345    if (Scheduler)
346      return Scheduler;
347  
348    // Default to GenericScheduler.
349    return createGenericSchedLive(this);
350  }
351  
352  /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
353  /// the caller. We don't have a command line option to override the postRA
354  /// scheduler. The Target must configure it.
355  ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
356    // Get the postRA scheduler set by the target for this function.
357    ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
358    if (Scheduler)
359      return Scheduler;
360  
361    // Default to GenericScheduler.
362    return createGenericSchedPostRA(this);
363  }
364  
365  /// Top-level MachineScheduler pass driver.
366  ///
367  /// Visit blocks in function order. Divide each block into scheduling regions
368  /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
369  /// consistent with the DAG builder, which traverses the interior of the
370  /// scheduling regions bottom-up.
371  ///
372  /// This design avoids exposing scheduling boundaries to the DAG builder,
373  /// simplifying the DAG builder's support for "special" target instructions.
374  /// At the same time the design allows target schedulers to operate across
375  /// scheduling boundaries, for example to bundle the boundary instructions
376  /// without reordering them. This creates complexity, because the target
377  /// scheduler must update the RegionBegin and RegionEnd positions cached by
378  /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
379  /// design would be to split blocks at scheduling boundaries, but LLVM has a
380  /// general bias against block splitting purely for implementation simplicity.
381  bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
382    if (skipFunction(mf.getFunction()))
383      return false;
384  
385    if (EnableMachineSched.getNumOccurrences()) {
386      if (!EnableMachineSched)
387        return false;
388    } else if (!mf.getSubtarget().enableMachineScheduler())
389      return false;
390  
391    LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
392  
393    // Initialize the context of the pass.
394    MF = &mf;
395    MLI = &getAnalysis<MachineLoopInfo>();
396    MDT = &getAnalysis<MachineDominatorTree>();
397    PassConfig = &getAnalysis<TargetPassConfig>();
398    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
399  
400    LIS = &getAnalysis<LiveIntervals>();
401  
402    if (VerifyScheduling) {
403      LLVM_DEBUG(LIS->dump());
404      MF->verify(this, "Before machine scheduling.");
405    }
406    RegClassInfo->runOnMachineFunction(*MF);
407  
408    // Instantiate the selected scheduler for this target, function, and
409    // optimization level.
410    std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
411    scheduleRegions(*Scheduler, false);
412  
413    LLVM_DEBUG(LIS->dump());
414    if (VerifyScheduling)
415      MF->verify(this, "After machine scheduling.");
416    return true;
417  }
418  
419  bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
420    if (skipFunction(mf.getFunction()))
421      return false;
422  
423    if (EnablePostRAMachineSched.getNumOccurrences()) {
424      if (!EnablePostRAMachineSched)
425        return false;
426    } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
427      LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
428      return false;
429    }
430    LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
431  
432    // Initialize the context of the pass.
433    MF = &mf;
434    MLI = &getAnalysis<MachineLoopInfo>();
435    PassConfig = &getAnalysis<TargetPassConfig>();
436    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
437  
438    if (VerifyScheduling)
439      MF->verify(this, "Before post machine scheduling.");
440  
441    // Instantiate the selected scheduler for this target, function, and
442    // optimization level.
443    std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
444    scheduleRegions(*Scheduler, true);
445  
446    if (VerifyScheduling)
447      MF->verify(this, "After post machine scheduling.");
448    return true;
449  }
450  
451  /// Return true of the given instruction should not be included in a scheduling
452  /// region.
453  ///
454  /// MachineScheduler does not currently support scheduling across calls. To
455  /// handle calls, the DAG builder needs to be modified to create register
456  /// anti/output dependencies on the registers clobbered by the call's regmask
457  /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
458  /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
459  /// the boundary, but there would be no benefit to postRA scheduling across
460  /// calls this late anyway.
461  static bool isSchedBoundary(MachineBasicBlock::iterator MI,
462                              MachineBasicBlock *MBB,
463                              MachineFunction *MF,
464                              const TargetInstrInfo *TII) {
465    return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
466  }
467  
468  /// A region of an MBB for scheduling.
469  namespace {
470  struct SchedRegion {
471    /// RegionBegin is the first instruction in the scheduling region, and
472    /// RegionEnd is either MBB->end() or the scheduling boundary after the
473    /// last instruction in the scheduling region. These iterators cannot refer
474    /// to instructions outside of the identified scheduling region because
475    /// those may be reordered before scheduling this region.
476    MachineBasicBlock::iterator RegionBegin;
477    MachineBasicBlock::iterator RegionEnd;
478    unsigned NumRegionInstrs;
479  
480    SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
481                unsigned N) :
482      RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
483  };
484  } // end anonymous namespace
485  
486  using MBBRegionsVector = SmallVector<SchedRegion, 16>;
487  
488  static void
489  getSchedRegions(MachineBasicBlock *MBB,
490                  MBBRegionsVector &Regions,
491                  bool RegionsTopDown) {
492    MachineFunction *MF = MBB->getParent();
493    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
494  
495    MachineBasicBlock::iterator I = nullptr;
496    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
497        RegionEnd != MBB->begin(); RegionEnd = I) {
498  
499      // Avoid decrementing RegionEnd for blocks with no terminator.
500      if (RegionEnd != MBB->end() ||
501          isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
502        --RegionEnd;
503      }
504  
505      // The next region starts above the previous region. Look backward in the
506      // instruction stream until we find the nearest boundary.
507      unsigned NumRegionInstrs = 0;
508      I = RegionEnd;
509      for (;I != MBB->begin(); --I) {
510        MachineInstr &MI = *std::prev(I);
511        if (isSchedBoundary(&MI, &*MBB, MF, TII))
512          break;
513        if (!MI.isDebugOrPseudoInstr()) {
514          // MBB::size() uses instr_iterator to count. Here we need a bundle to
515          // count as a single instruction.
516          ++NumRegionInstrs;
517        }
518      }
519  
520      // It's possible we found a scheduling region that only has debug
521      // instructions. Don't bother scheduling these.
522      if (NumRegionInstrs != 0)
523        Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
524    }
525  
526    if (RegionsTopDown)
527      std::reverse(Regions.begin(), Regions.end());
528  }
529  
530  /// Main driver for both MachineScheduler and PostMachineScheduler.
531  void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
532                                             bool FixKillFlags) {
533    // Visit all machine basic blocks.
534    //
535    // TODO: Visit blocks in global postorder or postorder within the bottom-up
536    // loop tree. Then we can optionally compute global RegPressure.
537    for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
538         MBB != MBBEnd; ++MBB) {
539  
540      Scheduler.startBlock(&*MBB);
541  
542  #ifndef NDEBUG
543      if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
544        continue;
545      if (SchedOnlyBlock.getNumOccurrences()
546          && (int)SchedOnlyBlock != MBB->getNumber())
547        continue;
548  #endif
549  
550      // Break the block into scheduling regions [I, RegionEnd). RegionEnd
551      // points to the scheduling boundary at the bottom of the region. The DAG
552      // does not include RegionEnd, but the region does (i.e. the next
553      // RegionEnd is above the previous RegionBegin). If the current block has
554      // no terminator then RegionEnd == MBB->end() for the bottom region.
555      //
556      // All the regions of MBB are first found and stored in MBBRegions, which
557      // will be processed (MBB) top-down if initialized with true.
558      //
559      // The Scheduler may insert instructions during either schedule() or
560      // exitRegion(), even for empty regions. So the local iterators 'I' and
561      // 'RegionEnd' are invalid across these calls. Instructions must not be
562      // added to other regions than the current one without updating MBBRegions.
563  
564      MBBRegionsVector MBBRegions;
565      getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
566      for (const SchedRegion &R : MBBRegions) {
567        MachineBasicBlock::iterator I = R.RegionBegin;
568        MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
569        unsigned NumRegionInstrs = R.NumRegionInstrs;
570  
571        // Notify the scheduler of the region, even if we may skip scheduling
572        // it. Perhaps it still needs to be bundled.
573        Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
574  
575        // Skip empty scheduling regions (0 or 1 schedulable instructions).
576        if (I == RegionEnd || I == std::prev(RegionEnd)) {
577          // Close the current region. Bundle the terminator if needed.
578          // This invalidates 'RegionEnd' and 'I'.
579          Scheduler.exitRegion();
580          continue;
581        }
582        LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
583        LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
584                          << " " << MBB->getName() << "\n  From: " << *I
585                          << "    To: ";
586                   if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
587                   else dbgs() << "End\n";
588                   dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
589        if (DumpCriticalPathLength) {
590          errs() << MF->getName();
591          errs() << ":%bb. " << MBB->getNumber();
592          errs() << " " << MBB->getName() << " \n";
593        }
594  
595        // Schedule a region: possibly reorder instructions.
596        // This invalidates the original region iterators.
597        Scheduler.schedule();
598  
599        // Close the current region.
600        Scheduler.exitRegion();
601      }
602      Scheduler.finishBlock();
603      // FIXME: Ideally, no further passes should rely on kill flags. However,
604      // thumb2 size reduction is currently an exception, so the PostMIScheduler
605      // needs to do this.
606      if (FixKillFlags)
607        Scheduler.fixupKills(*MBB);
608    }
609    Scheduler.finalizeSchedule();
610  }
611  
612  void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
613    // unimplemented
614  }
615  
616  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
617  LLVM_DUMP_METHOD void ReadyQueue::dump() const {
618    dbgs() << "Queue " << Name << ": ";
619    for (const SUnit *SU : Queue)
620      dbgs() << SU->NodeNum << " ";
621    dbgs() << "\n";
622  }
623  #endif
624  
625  //===----------------------------------------------------------------------===//
626  // ScheduleDAGMI - Basic machine instruction scheduling. This is
627  // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
628  // virtual registers.
629  // ===----------------------------------------------------------------------===/
630  
631  // Provide a vtable anchor.
632  ScheduleDAGMI::~ScheduleDAGMI() = default;
633  
634  /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
635  /// NumPredsLeft reaches zero, release the successor node.
636  ///
637  /// FIXME: Adjust SuccSU height based on MinLatency.
638  void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
639    SUnit *SuccSU = SuccEdge->getSUnit();
640  
641    if (SuccEdge->isWeak()) {
642      --SuccSU->WeakPredsLeft;
643      if (SuccEdge->isCluster())
644        NextClusterSucc = SuccSU;
645      return;
646    }
647  #ifndef NDEBUG
648    if (SuccSU->NumPredsLeft == 0) {
649      dbgs() << "*** Scheduling failed! ***\n";
650      dumpNode(*SuccSU);
651      dbgs() << " has been released too many times!\n";
652      llvm_unreachable(nullptr);
653    }
654  #endif
655    // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
656    // CurrCycle may have advanced since then.
657    if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
658      SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
659  
660    --SuccSU->NumPredsLeft;
661    if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
662      SchedImpl->releaseTopNode(SuccSU);
663  }
664  
665  /// releaseSuccessors - Call releaseSucc on each of SU's successors.
666  void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
667    for (SDep &Succ : SU->Succs)
668      releaseSucc(SU, &Succ);
669  }
670  
671  /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
672  /// NumSuccsLeft reaches zero, release the predecessor node.
673  ///
674  /// FIXME: Adjust PredSU height based on MinLatency.
675  void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
676    SUnit *PredSU = PredEdge->getSUnit();
677  
678    if (PredEdge->isWeak()) {
679      --PredSU->WeakSuccsLeft;
680      if (PredEdge->isCluster())
681        NextClusterPred = PredSU;
682      return;
683    }
684  #ifndef NDEBUG
685    if (PredSU->NumSuccsLeft == 0) {
686      dbgs() << "*** Scheduling failed! ***\n";
687      dumpNode(*PredSU);
688      dbgs() << " has been released too many times!\n";
689      llvm_unreachable(nullptr);
690    }
691  #endif
692    // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
693    // CurrCycle may have advanced since then.
694    if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
695      PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
696  
697    --PredSU->NumSuccsLeft;
698    if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
699      SchedImpl->releaseBottomNode(PredSU);
700  }
701  
702  /// releasePredecessors - Call releasePred on each of SU's predecessors.
703  void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
704    for (SDep &Pred : SU->Preds)
705      releasePred(SU, &Pred);
706  }
707  
708  void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
709    ScheduleDAGInstrs::startBlock(bb);
710    SchedImpl->enterMBB(bb);
711  }
712  
713  void ScheduleDAGMI::finishBlock() {
714    SchedImpl->leaveMBB();
715    ScheduleDAGInstrs::finishBlock();
716  }
717  
718  /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
719  /// crossing a scheduling boundary. [begin, end) includes all instructions in
720  /// the region, including the boundary itself and single-instruction regions
721  /// that don't get scheduled.
722  void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
723                                       MachineBasicBlock::iterator begin,
724                                       MachineBasicBlock::iterator end,
725                                       unsigned regioninstrs)
726  {
727    ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
728  
729    SchedImpl->initPolicy(begin, end, regioninstrs);
730  }
731  
732  /// This is normally called from the main scheduler loop but may also be invoked
733  /// by the scheduling strategy to perform additional code motion.
734  void ScheduleDAGMI::moveInstruction(
735    MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
736    // Advance RegionBegin if the first instruction moves down.
737    if (&*RegionBegin == MI)
738      ++RegionBegin;
739  
740    // Update the instruction stream.
741    BB->splice(InsertPos, BB, MI);
742  
743    // Update LiveIntervals
744    if (LIS)
745      LIS->handleMove(*MI, /*UpdateFlags=*/true);
746  
747    // Recede RegionBegin if an instruction moves above the first.
748    if (RegionBegin == InsertPos)
749      RegionBegin = MI;
750  }
751  
752  bool ScheduleDAGMI::checkSchedLimit() {
753  #if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
754    if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
755      CurrentTop = CurrentBottom;
756      return false;
757    }
758    ++NumInstrsScheduled;
759  #endif
760    return true;
761  }
762  
763  /// Per-region scheduling driver, called back from
764  /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
765  /// does not consider liveness or register pressure. It is useful for PostRA
766  /// scheduling and potentially other custom schedulers.
767  void ScheduleDAGMI::schedule() {
768    LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
769    LLVM_DEBUG(SchedImpl->dumpPolicy());
770  
771    // Build the DAG.
772    buildSchedGraph(AA);
773  
774    postprocessDAG();
775  
776    SmallVector<SUnit*, 8> TopRoots, BotRoots;
777    findRootsAndBiasEdges(TopRoots, BotRoots);
778  
779    LLVM_DEBUG(dump());
780    if (PrintDAGs) dump();
781    if (ViewMISchedDAGs) viewGraph();
782  
783    // Initialize the strategy before modifying the DAG.
784    // This may initialize a DFSResult to be used for queue priority.
785    SchedImpl->initialize(this);
786  
787    // Initialize ready queues now that the DAG and priority data are finalized.
788    initQueues(TopRoots, BotRoots);
789  
790    bool IsTopNode = false;
791    while (true) {
792      LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
793      SUnit *SU = SchedImpl->pickNode(IsTopNode);
794      if (!SU) break;
795  
796      assert(!SU->isScheduled && "Node already scheduled");
797      if (!checkSchedLimit())
798        break;
799  
800      MachineInstr *MI = SU->getInstr();
801      if (IsTopNode) {
802        assert(SU->isTopReady() && "node still has unscheduled dependencies");
803        if (&*CurrentTop == MI)
804          CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
805        else
806          moveInstruction(MI, CurrentTop);
807      } else {
808        assert(SU->isBottomReady() && "node still has unscheduled dependencies");
809        MachineBasicBlock::iterator priorII =
810          priorNonDebug(CurrentBottom, CurrentTop);
811        if (&*priorII == MI)
812          CurrentBottom = priorII;
813        else {
814          if (&*CurrentTop == MI)
815            CurrentTop = nextIfDebug(++CurrentTop, priorII);
816          moveInstruction(MI, CurrentBottom);
817          CurrentBottom = MI;
818        }
819      }
820      // Notify the scheduling strategy before updating the DAG.
821      // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
822      // runs, it can then use the accurate ReadyCycle time to determine whether
823      // newly released nodes can move to the readyQ.
824      SchedImpl->schedNode(SU, IsTopNode);
825  
826      updateQueues(SU, IsTopNode);
827    }
828    assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
829  
830    placeDebugValues();
831  
832    LLVM_DEBUG({
833      dbgs() << "*** Final schedule for "
834             << printMBBReference(*begin()->getParent()) << " ***\n";
835      dumpSchedule();
836      dbgs() << '\n';
837    });
838  }
839  
840  /// Apply each ScheduleDAGMutation step in order.
841  void ScheduleDAGMI::postprocessDAG() {
842    for (auto &m : Mutations)
843      m->apply(this);
844  }
845  
846  void ScheduleDAGMI::
847  findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
848                        SmallVectorImpl<SUnit*> &BotRoots) {
849    for (SUnit &SU : SUnits) {
850      assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
851  
852      // Order predecessors so DFSResult follows the critical path.
853      SU.biasCriticalPath();
854  
855      // A SUnit is ready to top schedule if it has no predecessors.
856      if (!SU.NumPredsLeft)
857        TopRoots.push_back(&SU);
858      // A SUnit is ready to bottom schedule if it has no successors.
859      if (!SU.NumSuccsLeft)
860        BotRoots.push_back(&SU);
861    }
862    ExitSU.biasCriticalPath();
863  }
864  
865  /// Identify DAG roots and setup scheduler queues.
866  void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
867                                 ArrayRef<SUnit*> BotRoots) {
868    NextClusterSucc = nullptr;
869    NextClusterPred = nullptr;
870  
871    // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
872    //
873    // Nodes with unreleased weak edges can still be roots.
874    // Release top roots in forward order.
875    for (SUnit *SU : TopRoots)
876      SchedImpl->releaseTopNode(SU);
877  
878    // Release bottom roots in reverse order so the higher priority nodes appear
879    // first. This is more natural and slightly more efficient.
880    for (SmallVectorImpl<SUnit*>::const_reverse_iterator
881           I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
882      SchedImpl->releaseBottomNode(*I);
883    }
884  
885    releaseSuccessors(&EntrySU);
886    releasePredecessors(&ExitSU);
887  
888    SchedImpl->registerRoots();
889  
890    // Advance past initial DebugValues.
891    CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
892    CurrentBottom = RegionEnd;
893  }
894  
895  /// Update scheduler queues after scheduling an instruction.
896  void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
897    // Release dependent instructions for scheduling.
898    if (IsTopNode)
899      releaseSuccessors(SU);
900    else
901      releasePredecessors(SU);
902  
903    SU->isScheduled = true;
904  }
905  
906  /// Reinsert any remaining debug_values, just like the PostRA scheduler.
907  void ScheduleDAGMI::placeDebugValues() {
908    // If first instruction was a DBG_VALUE then put it back.
909    if (FirstDbgValue) {
910      BB->splice(RegionBegin, BB, FirstDbgValue);
911      RegionBegin = FirstDbgValue;
912    }
913  
914    for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
915           DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
916      std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
917      MachineInstr *DbgValue = P.first;
918      MachineBasicBlock::iterator OrigPrevMI = P.second;
919      if (&*RegionBegin == DbgValue)
920        ++RegionBegin;
921      BB->splice(std::next(OrigPrevMI), BB, DbgValue);
922      if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
923        RegionEnd = DbgValue;
924    }
925  }
926  
927  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
928  LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
929    for (MachineInstr &MI : *this) {
930      if (SUnit *SU = getSUnit(&MI))
931        dumpNode(*SU);
932      else
933        dbgs() << "Missing SUnit\n";
934    }
935  }
936  #endif
937  
938  //===----------------------------------------------------------------------===//
939  // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
940  // preservation.
941  //===----------------------------------------------------------------------===//
942  
943  ScheduleDAGMILive::~ScheduleDAGMILive() {
944    delete DFSResult;
945  }
946  
947  void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
948    const MachineInstr &MI = *SU.getInstr();
949    for (const MachineOperand &MO : MI.operands()) {
950      if (!MO.isReg())
951        continue;
952      if (!MO.readsReg())
953        continue;
954      if (TrackLaneMasks && !MO.isUse())
955        continue;
956  
957      Register Reg = MO.getReg();
958      if (!Register::isVirtualRegister(Reg))
959        continue;
960  
961      // Ignore re-defs.
962      if (TrackLaneMasks) {
963        bool FoundDef = false;
964        for (const MachineOperand &MO2 : MI.operands()) {
965          if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
966            FoundDef = true;
967            break;
968          }
969        }
970        if (FoundDef)
971          continue;
972      }
973  
974      // Record this local VReg use.
975      VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
976      for (; UI != VRegUses.end(); ++UI) {
977        if (UI->SU == &SU)
978          break;
979      }
980      if (UI == VRegUses.end())
981        VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
982    }
983  }
984  
985  /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
986  /// crossing a scheduling boundary. [begin, end) includes all instructions in
987  /// the region, including the boundary itself and single-instruction regions
988  /// that don't get scheduled.
989  void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
990                                  MachineBasicBlock::iterator begin,
991                                  MachineBasicBlock::iterator end,
992                                  unsigned regioninstrs)
993  {
994    // ScheduleDAGMI initializes SchedImpl's per-region policy.
995    ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
996  
997    // For convenience remember the end of the liveness region.
998    LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
999  
1000    SUPressureDiffs.clear();
1001  
1002    ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1003    ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1004  
1005    assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1006           "ShouldTrackLaneMasks requires ShouldTrackPressure");
1007  }
1008  
1009  // Setup the register pressure trackers for the top scheduled and bottom
1010  // scheduled regions.
1011  void ScheduleDAGMILive::initRegPressure() {
1012    VRegUses.clear();
1013    VRegUses.setUniverse(MRI.getNumVirtRegs());
1014    for (SUnit &SU : SUnits)
1015      collectVRegUses(SU);
1016  
1017    TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1018                      ShouldTrackLaneMasks, false);
1019    BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1020                      ShouldTrackLaneMasks, false);
1021  
1022    // Close the RPTracker to finalize live ins.
1023    RPTracker.closeRegion();
1024  
1025    LLVM_DEBUG(RPTracker.dump());
1026  
1027    // Initialize the live ins and live outs.
1028    TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1029    BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1030  
1031    // Close one end of the tracker so we can call
1032    // getMaxUpward/DownwardPressureDelta before advancing across any
1033    // instructions. This converts currently live regs into live ins/outs.
1034    TopRPTracker.closeTop();
1035    BotRPTracker.closeBottom();
1036  
1037    BotRPTracker.initLiveThru(RPTracker);
1038    if (!BotRPTracker.getLiveThru().empty()) {
1039      TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1040      LLVM_DEBUG(dbgs() << "Live Thru: ";
1041                 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1042    };
1043  
1044    // For each live out vreg reduce the pressure change associated with other
1045    // uses of the same vreg below the live-out reaching def.
1046    updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1047  
1048    // Account for liveness generated by the region boundary.
1049    if (LiveRegionEnd != RegionEnd) {
1050      SmallVector<RegisterMaskPair, 8> LiveUses;
1051      BotRPTracker.recede(&LiveUses);
1052      updatePressureDiffs(LiveUses);
1053    }
1054  
1055    LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1056               dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1057               dbgs() << "Bottom Pressure:\n";
1058               dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1059  
1060    assert((BotRPTracker.getPos() == RegionEnd ||
1061            (RegionEnd->isDebugInstr() &&
1062             BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1063           "Can't find the region bottom");
1064  
1065    // Cache the list of excess pressure sets in this region. This will also track
1066    // the max pressure in the scheduled code for these sets.
1067    RegionCriticalPSets.clear();
1068    const std::vector<unsigned> &RegionPressure =
1069      RPTracker.getPressure().MaxSetPressure;
1070    for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1071      unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1072      if (RegionPressure[i] > Limit) {
1073        LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1074                          << " Actual " << RegionPressure[i] << "\n");
1075        RegionCriticalPSets.push_back(PressureChange(i));
1076      }
1077    }
1078    LLVM_DEBUG(dbgs() << "Excess PSets: ";
1079               for (const PressureChange &RCPS
1080                    : RegionCriticalPSets) dbgs()
1081               << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1082               dbgs() << "\n");
1083  }
1084  
1085  void ScheduleDAGMILive::
1086  updateScheduledPressure(const SUnit *SU,
1087                          const std::vector<unsigned> &NewMaxPressure) {
1088    const PressureDiff &PDiff = getPressureDiff(SU);
1089    unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1090    for (const PressureChange &PC : PDiff) {
1091      if (!PC.isValid())
1092        break;
1093      unsigned ID = PC.getPSet();
1094      while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1095        ++CritIdx;
1096      if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1097        if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1098            && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1099          RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1100      }
1101      unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1102      if (NewMaxPressure[ID] >= Limit - 2) {
1103        LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1104                          << NewMaxPressure[ID]
1105                          << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1106                          << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1107                          << " livethru)\n");
1108      }
1109    }
1110  }
1111  
1112  /// Update the PressureDiff array for liveness after scheduling this
1113  /// instruction.
1114  void ScheduleDAGMILive::updatePressureDiffs(
1115      ArrayRef<RegisterMaskPair> LiveUses) {
1116    for (const RegisterMaskPair &P : LiveUses) {
1117      Register Reg = P.RegUnit;
1118      /// FIXME: Currently assuming single-use physregs.
1119      if (!Register::isVirtualRegister(Reg))
1120        continue;
1121  
1122      if (ShouldTrackLaneMasks) {
1123        // If the register has just become live then other uses won't change
1124        // this fact anymore => decrement pressure.
1125        // If the register has just become dead then other uses make it come
1126        // back to life => increment pressure.
1127        bool Decrement = P.LaneMask.any();
1128  
1129        for (const VReg2SUnit &V2SU
1130             : make_range(VRegUses.find(Reg), VRegUses.end())) {
1131          SUnit &SU = *V2SU.SU;
1132          if (SU.isScheduled || &SU == &ExitSU)
1133            continue;
1134  
1135          PressureDiff &PDiff = getPressureDiff(&SU);
1136          PDiff.addPressureChange(Reg, Decrement, &MRI);
1137          LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1138                            << printReg(Reg, TRI) << ':'
1139                            << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1140                     dbgs() << "              to "; PDiff.dump(*TRI););
1141        }
1142      } else {
1143        assert(P.LaneMask.any());
1144        LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1145        // This may be called before CurrentBottom has been initialized. However,
1146        // BotRPTracker must have a valid position. We want the value live into the
1147        // instruction or live out of the block, so ask for the previous
1148        // instruction's live-out.
1149        const LiveInterval &LI = LIS->getInterval(Reg);
1150        VNInfo *VNI;
1151        MachineBasicBlock::const_iterator I =
1152          nextIfDebug(BotRPTracker.getPos(), BB->end());
1153        if (I == BB->end())
1154          VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1155        else {
1156          LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1157          VNI = LRQ.valueIn();
1158        }
1159        // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1160        assert(VNI && "No live value at use.");
1161        for (const VReg2SUnit &V2SU
1162             : make_range(VRegUses.find(Reg), VRegUses.end())) {
1163          SUnit *SU = V2SU.SU;
1164          // If this use comes before the reaching def, it cannot be a last use,
1165          // so decrease its pressure change.
1166          if (!SU->isScheduled && SU != &ExitSU) {
1167            LiveQueryResult LRQ =
1168                LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1169            if (LRQ.valueIn() == VNI) {
1170              PressureDiff &PDiff = getPressureDiff(SU);
1171              PDiff.addPressureChange(Reg, true, &MRI);
1172              LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1173                                << *SU->getInstr();
1174                         dbgs() << "              to "; PDiff.dump(*TRI););
1175            }
1176          }
1177        }
1178      }
1179    }
1180  }
1181  
1182  void ScheduleDAGMILive::dump() const {
1183  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1184    if (EntrySU.getInstr() != nullptr)
1185      dumpNodeAll(EntrySU);
1186    for (const SUnit &SU : SUnits) {
1187      dumpNodeAll(SU);
1188      if (ShouldTrackPressure) {
1189        dbgs() << "  Pressure Diff      : ";
1190        getPressureDiff(&SU).dump(*TRI);
1191      }
1192      dbgs() << "  Single Issue       : ";
1193      if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1194          SchedModel.mustEndGroup(SU.getInstr()))
1195        dbgs() << "true;";
1196      else
1197        dbgs() << "false;";
1198      dbgs() << '\n';
1199    }
1200    if (ExitSU.getInstr() != nullptr)
1201      dumpNodeAll(ExitSU);
1202  #endif
1203  }
1204  
1205  /// schedule - Called back from MachineScheduler::runOnMachineFunction
1206  /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1207  /// only includes instructions that have DAG nodes, not scheduling boundaries.
1208  ///
1209  /// This is a skeletal driver, with all the functionality pushed into helpers,
1210  /// so that it can be easily extended by experimental schedulers. Generally,
1211  /// implementing MachineSchedStrategy should be sufficient to implement a new
1212  /// scheduling algorithm. However, if a scheduler further subclasses
1213  /// ScheduleDAGMILive then it will want to override this virtual method in order
1214  /// to update any specialized state.
1215  void ScheduleDAGMILive::schedule() {
1216    LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1217    LLVM_DEBUG(SchedImpl->dumpPolicy());
1218    buildDAGWithRegPressure();
1219  
1220    postprocessDAG();
1221  
1222    SmallVector<SUnit*, 8> TopRoots, BotRoots;
1223    findRootsAndBiasEdges(TopRoots, BotRoots);
1224  
1225    // Initialize the strategy before modifying the DAG.
1226    // This may initialize a DFSResult to be used for queue priority.
1227    SchedImpl->initialize(this);
1228  
1229    LLVM_DEBUG(dump());
1230    if (PrintDAGs) dump();
1231    if (ViewMISchedDAGs) viewGraph();
1232  
1233    // Initialize ready queues now that the DAG and priority data are finalized.
1234    initQueues(TopRoots, BotRoots);
1235  
1236    bool IsTopNode = false;
1237    while (true) {
1238      LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1239      SUnit *SU = SchedImpl->pickNode(IsTopNode);
1240      if (!SU) break;
1241  
1242      assert(!SU->isScheduled && "Node already scheduled");
1243      if (!checkSchedLimit())
1244        break;
1245  
1246      scheduleMI(SU, IsTopNode);
1247  
1248      if (DFSResult) {
1249        unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1250        if (!ScheduledTrees.test(SubtreeID)) {
1251          ScheduledTrees.set(SubtreeID);
1252          DFSResult->scheduleTree(SubtreeID);
1253          SchedImpl->scheduleTree(SubtreeID);
1254        }
1255      }
1256  
1257      // Notify the scheduling strategy after updating the DAG.
1258      SchedImpl->schedNode(SU, IsTopNode);
1259  
1260      updateQueues(SU, IsTopNode);
1261    }
1262    assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1263  
1264    placeDebugValues();
1265  
1266    LLVM_DEBUG({
1267      dbgs() << "*** Final schedule for "
1268             << printMBBReference(*begin()->getParent()) << " ***\n";
1269      dumpSchedule();
1270      dbgs() << '\n';
1271    });
1272  }
1273  
1274  /// Build the DAG and setup three register pressure trackers.
1275  void ScheduleDAGMILive::buildDAGWithRegPressure() {
1276    if (!ShouldTrackPressure) {
1277      RPTracker.reset();
1278      RegionCriticalPSets.clear();
1279      buildSchedGraph(AA);
1280      return;
1281    }
1282  
1283    // Initialize the register pressure tracker used by buildSchedGraph.
1284    RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1285                   ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1286  
1287    // Account for liveness generate by the region boundary.
1288    if (LiveRegionEnd != RegionEnd)
1289      RPTracker.recede();
1290  
1291    // Build the DAG, and compute current register pressure.
1292    buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1293  
1294    // Initialize top/bottom trackers after computing region pressure.
1295    initRegPressure();
1296  }
1297  
1298  void ScheduleDAGMILive::computeDFSResult() {
1299    if (!DFSResult)
1300      DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1301    DFSResult->clear();
1302    ScheduledTrees.clear();
1303    DFSResult->resize(SUnits.size());
1304    DFSResult->compute(SUnits);
1305    ScheduledTrees.resize(DFSResult->getNumSubtrees());
1306  }
1307  
1308  /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1309  /// only provides the critical path for single block loops. To handle loops that
1310  /// span blocks, we could use the vreg path latencies provided by
1311  /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1312  /// available for use in the scheduler.
1313  ///
1314  /// The cyclic path estimation identifies a def-use pair that crosses the back
1315  /// edge and considers the depth and height of the nodes. For example, consider
1316  /// the following instruction sequence where each instruction has unit latency
1317  /// and defines an eponymous virtual register:
1318  ///
1319  /// a->b(a,c)->c(b)->d(c)->exit
1320  ///
1321  /// The cyclic critical path is a two cycles: b->c->b
1322  /// The acyclic critical path is four cycles: a->b->c->d->exit
1323  /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1324  /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1325  /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1326  /// LiveInDepth = depth(b) = len(a->b) = 1
1327  ///
1328  /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1329  /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1330  /// CyclicCriticalPath = min(2, 2) = 2
1331  ///
1332  /// This could be relevant to PostRA scheduling, but is currently implemented
1333  /// assuming LiveIntervals.
1334  unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1335    // This only applies to single block loop.
1336    if (!BB->isSuccessor(BB))
1337      return 0;
1338  
1339    unsigned MaxCyclicLatency = 0;
1340    // Visit each live out vreg def to find def/use pairs that cross iterations.
1341    for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1342      Register Reg = P.RegUnit;
1343      if (!Register::isVirtualRegister(Reg))
1344        continue;
1345      const LiveInterval &LI = LIS->getInterval(Reg);
1346      const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1347      if (!DefVNI)
1348        continue;
1349  
1350      MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1351      const SUnit *DefSU = getSUnit(DefMI);
1352      if (!DefSU)
1353        continue;
1354  
1355      unsigned LiveOutHeight = DefSU->getHeight();
1356      unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1357      // Visit all local users of the vreg def.
1358      for (const VReg2SUnit &V2SU
1359           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1360        SUnit *SU = V2SU.SU;
1361        if (SU == &ExitSU)
1362          continue;
1363  
1364        // Only consider uses of the phi.
1365        LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1366        if (!LRQ.valueIn()->isPHIDef())
1367          continue;
1368  
1369        // Assume that a path spanning two iterations is a cycle, which could
1370        // overestimate in strange cases. This allows cyclic latency to be
1371        // estimated as the minimum slack of the vreg's depth or height.
1372        unsigned CyclicLatency = 0;
1373        if (LiveOutDepth > SU->getDepth())
1374          CyclicLatency = LiveOutDepth - SU->getDepth();
1375  
1376        unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1377        if (LiveInHeight > LiveOutHeight) {
1378          if (LiveInHeight - LiveOutHeight < CyclicLatency)
1379            CyclicLatency = LiveInHeight - LiveOutHeight;
1380        } else
1381          CyclicLatency = 0;
1382  
1383        LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1384                          << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1385        if (CyclicLatency > MaxCyclicLatency)
1386          MaxCyclicLatency = CyclicLatency;
1387      }
1388    }
1389    LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1390    return MaxCyclicLatency;
1391  }
1392  
1393  /// Release ExitSU predecessors and setup scheduler queues. Re-position
1394  /// the Top RP tracker in case the region beginning has changed.
1395  void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1396                                     ArrayRef<SUnit*> BotRoots) {
1397    ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1398    if (ShouldTrackPressure) {
1399      assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1400      TopRPTracker.setPos(CurrentTop);
1401    }
1402  }
1403  
1404  /// Move an instruction and update register pressure.
1405  void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1406    // Move the instruction to its new location in the instruction stream.
1407    MachineInstr *MI = SU->getInstr();
1408  
1409    if (IsTopNode) {
1410      assert(SU->isTopReady() && "node still has unscheduled dependencies");
1411      if (&*CurrentTop == MI)
1412        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1413      else {
1414        moveInstruction(MI, CurrentTop);
1415        TopRPTracker.setPos(MI);
1416      }
1417  
1418      if (ShouldTrackPressure) {
1419        // Update top scheduled pressure.
1420        RegisterOperands RegOpers;
1421        RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1422        if (ShouldTrackLaneMasks) {
1423          // Adjust liveness and add missing dead+read-undef flags.
1424          SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1425          RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1426        } else {
1427          // Adjust for missing dead-def flags.
1428          RegOpers.detectDeadDefs(*MI, *LIS);
1429        }
1430  
1431        TopRPTracker.advance(RegOpers);
1432        assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1433        LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1434                       TopRPTracker.getRegSetPressureAtPos(), TRI););
1435  
1436        updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1437      }
1438    } else {
1439      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1440      MachineBasicBlock::iterator priorII =
1441        priorNonDebug(CurrentBottom, CurrentTop);
1442      if (&*priorII == MI)
1443        CurrentBottom = priorII;
1444      else {
1445        if (&*CurrentTop == MI) {
1446          CurrentTop = nextIfDebug(++CurrentTop, priorII);
1447          TopRPTracker.setPos(CurrentTop);
1448        }
1449        moveInstruction(MI, CurrentBottom);
1450        CurrentBottom = MI;
1451        BotRPTracker.setPos(CurrentBottom);
1452      }
1453      if (ShouldTrackPressure) {
1454        RegisterOperands RegOpers;
1455        RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1456        if (ShouldTrackLaneMasks) {
1457          // Adjust liveness and add missing dead+read-undef flags.
1458          SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1459          RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1460        } else {
1461          // Adjust for missing dead-def flags.
1462          RegOpers.detectDeadDefs(*MI, *LIS);
1463        }
1464  
1465        if (BotRPTracker.getPos() != CurrentBottom)
1466          BotRPTracker.recedeSkipDebugValues();
1467        SmallVector<RegisterMaskPair, 8> LiveUses;
1468        BotRPTracker.recede(RegOpers, &LiveUses);
1469        assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1470        LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1471                       BotRPTracker.getRegSetPressureAtPos(), TRI););
1472  
1473        updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1474        updatePressureDiffs(LiveUses);
1475      }
1476    }
1477  }
1478  
1479  //===----------------------------------------------------------------------===//
1480  // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1481  //===----------------------------------------------------------------------===//
1482  
1483  namespace {
1484  
1485  /// Post-process the DAG to create cluster edges between neighboring
1486  /// loads or between neighboring stores.
1487  class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1488    struct MemOpInfo {
1489      SUnit *SU;
1490      SmallVector<const MachineOperand *, 4> BaseOps;
1491      int64_t Offset;
1492      unsigned Width;
1493  
1494      MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1495                int64_t Offset, unsigned Width)
1496          : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1497            Width(Width) {}
1498  
1499      static bool Compare(const MachineOperand *const &A,
1500                          const MachineOperand *const &B) {
1501        if (A->getType() != B->getType())
1502          return A->getType() < B->getType();
1503        if (A->isReg())
1504          return A->getReg() < B->getReg();
1505        if (A->isFI()) {
1506          const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1507          const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1508          bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1509                                TargetFrameLowering::StackGrowsDown;
1510          return StackGrowsDown ? A->getIndex() > B->getIndex()
1511                                : A->getIndex() < B->getIndex();
1512        }
1513  
1514        llvm_unreachable("MemOpClusterMutation only supports register or frame "
1515                         "index bases.");
1516      }
1517  
1518      bool operator<(const MemOpInfo &RHS) const {
1519        // FIXME: Don't compare everything twice. Maybe use C++20 three way
1520        // comparison instead when it's available.
1521        if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1522                                         RHS.BaseOps.begin(), RHS.BaseOps.end(),
1523                                         Compare))
1524          return true;
1525        if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1526                                         BaseOps.begin(), BaseOps.end(), Compare))
1527          return false;
1528        if (Offset != RHS.Offset)
1529          return Offset < RHS.Offset;
1530        return SU->NodeNum < RHS.SU->NodeNum;
1531      }
1532    };
1533  
1534    const TargetInstrInfo *TII;
1535    const TargetRegisterInfo *TRI;
1536    bool IsLoad;
1537  
1538  public:
1539    BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1540                             const TargetRegisterInfo *tri, bool IsLoad)
1541        : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1542  
1543    void apply(ScheduleDAGInstrs *DAGInstrs) override;
1544  
1545  protected:
1546    void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1547                                  ScheduleDAGInstrs *DAG);
1548    void collectMemOpRecords(std::vector<SUnit> &SUnits,
1549                             SmallVectorImpl<MemOpInfo> &MemOpRecords);
1550    bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1551                     DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
1552  };
1553  
1554  class StoreClusterMutation : public BaseMemOpClusterMutation {
1555  public:
1556    StoreClusterMutation(const TargetInstrInfo *tii,
1557                         const TargetRegisterInfo *tri)
1558        : BaseMemOpClusterMutation(tii, tri, false) {}
1559  };
1560  
1561  class LoadClusterMutation : public BaseMemOpClusterMutation {
1562  public:
1563    LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1564        : BaseMemOpClusterMutation(tii, tri, true) {}
1565  };
1566  
1567  } // end anonymous namespace
1568  
1569  namespace llvm {
1570  
1571  std::unique_ptr<ScheduleDAGMutation>
1572  createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1573                               const TargetRegisterInfo *TRI) {
1574    return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1575                              : nullptr;
1576  }
1577  
1578  std::unique_ptr<ScheduleDAGMutation>
1579  createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1580                                const TargetRegisterInfo *TRI) {
1581    return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1582                              : nullptr;
1583  }
1584  
1585  } // end namespace llvm
1586  
1587  // Sorting all the loads/stores first, then for each load/store, checking the
1588  // following load/store one by one, until reach the first non-dependent one and
1589  // call target hook to see if they can cluster.
1590  // If FastCluster is enabled, we assume that, all the loads/stores have been
1591  // preprocessed and now, they didn't have dependencies on each other.
1592  void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1593      ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1594      ScheduleDAGInstrs *DAG) {
1595    // Keep track of the current cluster length and bytes for each SUnit.
1596    DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
1597  
1598    // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1599    // cluster mem ops collected within `MemOpRecords` array.
1600    for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1601      // Decision to cluster mem ops is taken based on target dependent logic
1602      auto MemOpa = MemOpRecords[Idx];
1603  
1604      // Seek for the next load/store to do the cluster.
1605      unsigned NextIdx = Idx + 1;
1606      for (; NextIdx < End; ++NextIdx)
1607        // Skip if MemOpb has been clustered already or has dependency with
1608        // MemOpa.
1609        if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1610            (FastCluster ||
1611             (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1612              !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1613          break;
1614      if (NextIdx == End)
1615        continue;
1616  
1617      auto MemOpb = MemOpRecords[NextIdx];
1618      unsigned ClusterLength = 2;
1619      unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1620      if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1621        ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1622        CurrentClusterBytes =
1623            SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1624      }
1625  
1626      if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1627                                    CurrentClusterBytes))
1628        continue;
1629  
1630      SUnit *SUa = MemOpa.SU;
1631      SUnit *SUb = MemOpb.SU;
1632      if (SUa->NodeNum > SUb->NodeNum)
1633        std::swap(SUa, SUb);
1634  
1635      // FIXME: Is this check really required?
1636      if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1637        continue;
1638  
1639      LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1640                        << SUb->NodeNum << ")\n");
1641      ++NumClustered;
1642  
1643      if (IsLoad) {
1644        // Copy successor edges from SUa to SUb. Interleaving computation
1645        // dependent on SUa can prevent load combining due to register reuse.
1646        // Predecessor edges do not need to be copied from SUb to SUa since
1647        // nearby loads should have effectively the same inputs.
1648        for (const SDep &Succ : SUa->Succs) {
1649          if (Succ.getSUnit() == SUb)
1650            continue;
1651          LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
1652                            << ")\n");
1653          DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1654        }
1655      } else {
1656        // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1657        // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1658        // do not need to be copied from SUa to SUb since no one will depend
1659        // on stores.
1660        // Notice that, we don't need to care about the memory dependency as
1661        // we won't try to cluster them if they have any memory dependency.
1662        for (const SDep &Pred : SUb->Preds) {
1663          if (Pred.getSUnit() == SUa)
1664            continue;
1665          LLVM_DEBUG(dbgs() << "  Copy Pred SU(" << Pred.getSUnit()->NodeNum
1666                            << ")\n");
1667          DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1668        }
1669      }
1670  
1671      SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1672                                               CurrentClusterBytes};
1673  
1674      LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength
1675                        << ", Curr cluster bytes: " << CurrentClusterBytes
1676                        << "\n");
1677    }
1678  }
1679  
1680  void BaseMemOpClusterMutation::collectMemOpRecords(
1681      std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1682    for (auto &SU : SUnits) {
1683      if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1684          (!IsLoad && !SU.getInstr()->mayStore()))
1685        continue;
1686  
1687      const MachineInstr &MI = *SU.getInstr();
1688      SmallVector<const MachineOperand *, 4> BaseOps;
1689      int64_t Offset;
1690      bool OffsetIsScalable;
1691      unsigned Width;
1692      if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1693                                             OffsetIsScalable, Width, TRI)) {
1694        MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1695  
1696        LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1697                          << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1698                          << ", Width: " << Width << "\n");
1699      }
1700  #ifndef NDEBUG
1701      for (const auto *Op : BaseOps)
1702        assert(Op);
1703  #endif
1704    }
1705  }
1706  
1707  bool BaseMemOpClusterMutation::groupMemOps(
1708      ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1709      DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
1710    bool FastCluster =
1711        ForceFastCluster ||
1712        MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1713  
1714    for (const auto &MemOp : MemOps) {
1715      unsigned ChainPredID = DAG->SUnits.size();
1716      if (FastCluster) {
1717        for (const SDep &Pred : MemOp.SU->Preds) {
1718          // We only want to cluster the mem ops that have the same ctrl(non-data)
1719          // pred so that they didn't have ctrl dependency for each other. But for
1720          // store instrs, we can still cluster them if the pred is load instr.
1721          if ((Pred.isCtrl() &&
1722               (IsLoad ||
1723                (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1724              !Pred.isArtificial()) {
1725            ChainPredID = Pred.getSUnit()->NodeNum;
1726            break;
1727          }
1728        }
1729      } else
1730        ChainPredID = 0;
1731  
1732      Groups[ChainPredID].push_back(MemOp);
1733    }
1734    return FastCluster;
1735  }
1736  
1737  /// Callback from DAG postProcessing to create cluster edges for loads/stores.
1738  void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1739    // Collect all the clusterable loads/stores
1740    SmallVector<MemOpInfo, 32> MemOpRecords;
1741    collectMemOpRecords(DAG->SUnits, MemOpRecords);
1742  
1743    if (MemOpRecords.size() < 2)
1744      return;
1745  
1746    // Put the loads/stores without dependency into the same group with some
1747    // heuristic if the DAG is too complex to avoid compiling time blow up.
1748    // Notice that, some fusion pair could be lost with this.
1749    DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
1750    bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
1751  
1752    for (auto &Group : Groups) {
1753      // Sorting the loads/stores, so that, we can stop the cluster as early as
1754      // possible.
1755      llvm::sort(Group.second);
1756  
1757      // Trying to cluster all the neighboring loads/stores.
1758      clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1759    }
1760  }
1761  
1762  //===----------------------------------------------------------------------===//
1763  // CopyConstrain - DAG post-processing to encourage copy elimination.
1764  //===----------------------------------------------------------------------===//
1765  
1766  namespace {
1767  
1768  /// Post-process the DAG to create weak edges from all uses of a copy to
1769  /// the one use that defines the copy's source vreg, most likely an induction
1770  /// variable increment.
1771  class CopyConstrain : public ScheduleDAGMutation {
1772    // Transient state.
1773    SlotIndex RegionBeginIdx;
1774  
1775    // RegionEndIdx is the slot index of the last non-debug instruction in the
1776    // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1777    SlotIndex RegionEndIdx;
1778  
1779  public:
1780    CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1781  
1782    void apply(ScheduleDAGInstrs *DAGInstrs) override;
1783  
1784  protected:
1785    void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1786  };
1787  
1788  } // end anonymous namespace
1789  
1790  namespace llvm {
1791  
1792  std::unique_ptr<ScheduleDAGMutation>
1793  createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1794                                 const TargetRegisterInfo *TRI) {
1795    return std::make_unique<CopyConstrain>(TII, TRI);
1796  }
1797  
1798  } // end namespace llvm
1799  
1800  /// constrainLocalCopy handles two possibilities:
1801  /// 1) Local src:
1802  /// I0:     = dst
1803  /// I1: src = ...
1804  /// I2:     = dst
1805  /// I3: dst = src (copy)
1806  /// (create pred->succ edges I0->I1, I2->I1)
1807  ///
1808  /// 2) Local copy:
1809  /// I0: dst = src (copy)
1810  /// I1:     = dst
1811  /// I2: src = ...
1812  /// I3:     = dst
1813  /// (create pred->succ edges I1->I2, I3->I2)
1814  ///
1815  /// Although the MachineScheduler is currently constrained to single blocks,
1816  /// this algorithm should handle extended blocks. An EBB is a set of
1817  /// contiguously numbered blocks such that the previous block in the EBB is
1818  /// always the single predecessor.
1819  void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1820    LiveIntervals *LIS = DAG->getLIS();
1821    MachineInstr *Copy = CopySU->getInstr();
1822  
1823    // Check for pure vreg copies.
1824    const MachineOperand &SrcOp = Copy->getOperand(1);
1825    Register SrcReg = SrcOp.getReg();
1826    if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1827      return;
1828  
1829    const MachineOperand &DstOp = Copy->getOperand(0);
1830    Register DstReg = DstOp.getReg();
1831    if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1832      return;
1833  
1834    // Check if either the dest or source is local. If it's live across a back
1835    // edge, it's not local. Note that if both vregs are live across the back
1836    // edge, we cannot successfully contrain the copy without cyclic scheduling.
1837    // If both the copy's source and dest are local live intervals, then we
1838    // should treat the dest as the global for the purpose of adding
1839    // constraints. This adds edges from source's other uses to the copy.
1840    unsigned LocalReg = SrcReg;
1841    unsigned GlobalReg = DstReg;
1842    LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1843    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1844      LocalReg = DstReg;
1845      GlobalReg = SrcReg;
1846      LocalLI = &LIS->getInterval(LocalReg);
1847      if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1848        return;
1849    }
1850    LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1851  
1852    // Find the global segment after the start of the local LI.
1853    LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1854    // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1855    // local live range. We could create edges from other global uses to the local
1856    // start, but the coalescer should have already eliminated these cases, so
1857    // don't bother dealing with it.
1858    if (GlobalSegment == GlobalLI->end())
1859      return;
1860  
1861    // If GlobalSegment is killed at the LocalLI->start, the call to find()
1862    // returned the next global segment. But if GlobalSegment overlaps with
1863    // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1864    // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1865    if (GlobalSegment->contains(LocalLI->beginIndex()))
1866      ++GlobalSegment;
1867  
1868    if (GlobalSegment == GlobalLI->end())
1869      return;
1870  
1871    // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1872    if (GlobalSegment != GlobalLI->begin()) {
1873      // Two address defs have no hole.
1874      if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1875                                 GlobalSegment->start)) {
1876        return;
1877      }
1878      // If the prior global segment may be defined by the same two-address
1879      // instruction that also defines LocalLI, then can't make a hole here.
1880      if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1881                                 LocalLI->beginIndex())) {
1882        return;
1883      }
1884      // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1885      // it would be a disconnected component in the live range.
1886      assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1887             "Disconnected LRG within the scheduling region.");
1888    }
1889    MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1890    if (!GlobalDef)
1891      return;
1892  
1893    SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1894    if (!GlobalSU)
1895      return;
1896  
1897    // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1898    // constraining the uses of the last local def to precede GlobalDef.
1899    SmallVector<SUnit*,8> LocalUses;
1900    const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1901    MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1902    SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1903    for (const SDep &Succ : LastLocalSU->Succs) {
1904      if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1905        continue;
1906      if (Succ.getSUnit() == GlobalSU)
1907        continue;
1908      if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1909        return;
1910      LocalUses.push_back(Succ.getSUnit());
1911    }
1912    // Open the top of the GlobalLI hole by constraining any earlier global uses
1913    // to precede the start of LocalLI.
1914    SmallVector<SUnit*,8> GlobalUses;
1915    MachineInstr *FirstLocalDef =
1916      LIS->getInstructionFromIndex(LocalLI->beginIndex());
1917    SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1918    for (const SDep &Pred : GlobalSU->Preds) {
1919      if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1920        continue;
1921      if (Pred.getSUnit() == FirstLocalSU)
1922        continue;
1923      if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1924        return;
1925      GlobalUses.push_back(Pred.getSUnit());
1926    }
1927    LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1928    // Add the weak edges.
1929    for (SUnit *LU : LocalUses) {
1930      LLVM_DEBUG(dbgs() << "  Local use SU(" << LU->NodeNum << ") -> SU("
1931                        << GlobalSU->NodeNum << ")\n");
1932      DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
1933    }
1934    for (SUnit *GU : GlobalUses) {
1935      LLVM_DEBUG(dbgs() << "  Global use SU(" << GU->NodeNum << ") -> SU("
1936                        << FirstLocalSU->NodeNum << ")\n");
1937      DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
1938    }
1939  }
1940  
1941  /// Callback from DAG postProcessing to create weak edges to encourage
1942  /// copy elimination.
1943  void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1944    ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1945    assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1946  
1947    MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1948    if (FirstPos == DAG->end())
1949      return;
1950    RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1951    RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1952        *priorNonDebug(DAG->end(), DAG->begin()));
1953  
1954    for (SUnit &SU : DAG->SUnits) {
1955      if (!SU.getInstr()->isCopy())
1956        continue;
1957  
1958      constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1959    }
1960  }
1961  
1962  //===----------------------------------------------------------------------===//
1963  // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1964  // and possibly other custom schedulers.
1965  //===----------------------------------------------------------------------===//
1966  
1967  static const unsigned InvalidCycle = ~0U;
1968  
1969  SchedBoundary::~SchedBoundary() { delete HazardRec; }
1970  
1971  /// Given a Count of resource usage and a Latency value, return true if a
1972  /// SchedBoundary becomes resource limited.
1973  /// If we are checking after scheduling a node, we should return true when
1974  /// we just reach the resource limit.
1975  static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1976                                 unsigned Latency, bool AfterSchedNode) {
1977    int ResCntFactor = (int)(Count - (Latency * LFactor));
1978    if (AfterSchedNode)
1979      return ResCntFactor >= (int)LFactor;
1980    else
1981      return ResCntFactor > (int)LFactor;
1982  }
1983  
1984  void SchedBoundary::reset() {
1985    // A new HazardRec is created for each DAG and owned by SchedBoundary.
1986    // Destroying and reconstructing it is very expensive though. So keep
1987    // invalid, placeholder HazardRecs.
1988    if (HazardRec && HazardRec->isEnabled()) {
1989      delete HazardRec;
1990      HazardRec = nullptr;
1991    }
1992    Available.clear();
1993    Pending.clear();
1994    CheckPending = false;
1995    CurrCycle = 0;
1996    CurrMOps = 0;
1997    MinReadyCycle = std::numeric_limits<unsigned>::max();
1998    ExpectedLatency = 0;
1999    DependentLatency = 0;
2000    RetiredMOps = 0;
2001    MaxExecutedResCount = 0;
2002    ZoneCritResIdx = 0;
2003    IsResourceLimited = false;
2004    ReservedCycles.clear();
2005    ReservedCyclesIndex.clear();
2006    ResourceGroupSubUnitMasks.clear();
2007  #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2008    // Track the maximum number of stall cycles that could arise either from the
2009    // latency of a DAG edge or the number of cycles that a processor resource is
2010    // reserved (SchedBoundary::ReservedCycles).
2011    MaxObservedStall = 0;
2012  #endif
2013    // Reserve a zero-count for invalid CritResIdx.
2014    ExecutedResCounts.resize(1);
2015    assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2016  }
2017  
2018  void SchedRemainder::
2019  init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2020    reset();
2021    if (!SchedModel->hasInstrSchedModel())
2022      return;
2023    RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
2024    for (SUnit &SU : DAG->SUnits) {
2025      const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2026      RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2027        * SchedModel->getMicroOpFactor();
2028      for (TargetSchedModel::ProcResIter
2029             PI = SchedModel->getWriteProcResBegin(SC),
2030             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2031        unsigned PIdx = PI->ProcResourceIdx;
2032        unsigned Factor = SchedModel->getResourceFactor(PIdx);
2033        RemainingCounts[PIdx] += (Factor * PI->Cycles);
2034      }
2035    }
2036  }
2037  
2038  void SchedBoundary::
2039  init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2040    reset();
2041    DAG = dag;
2042    SchedModel = smodel;
2043    Rem = rem;
2044    if (SchedModel->hasInstrSchedModel()) {
2045      unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2046      ReservedCyclesIndex.resize(ResourceCount);
2047      ExecutedResCounts.resize(ResourceCount);
2048      ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2049      unsigned NumUnits = 0;
2050  
2051      for (unsigned i = 0; i < ResourceCount; ++i) {
2052        ReservedCyclesIndex[i] = NumUnits;
2053        NumUnits += SchedModel->getProcResource(i)->NumUnits;
2054        if (isUnbufferedGroup(i)) {
2055          auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2056          for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2057               U != UE; ++U)
2058            ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2059        }
2060      }
2061  
2062      ReservedCycles.resize(NumUnits, InvalidCycle);
2063    }
2064  }
2065  
2066  /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2067  /// these "soft stalls" differently than the hard stall cycles based on CPU
2068  /// resources and computed by checkHazard(). A fully in-order model
2069  /// (MicroOpBufferSize==0) will not make use of this since instructions are not
2070  /// available for scheduling until they are ready. However, a weaker in-order
2071  /// model may use this for heuristics. For example, if a processor has in-order
2072  /// behavior when reading certain resources, this may come into play.
2073  unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
2074    if (!SU->isUnbuffered)
2075      return 0;
2076  
2077    unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2078    if (ReadyCycle > CurrCycle)
2079      return ReadyCycle - CurrCycle;
2080    return 0;
2081  }
2082  
2083  /// Compute the next cycle at which the given processor resource unit
2084  /// can be scheduled.
2085  unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
2086                                                         unsigned Cycles) {
2087    unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2088    // If this resource has never been used, always return cycle zero.
2089    if (NextUnreserved == InvalidCycle)
2090      return 0;
2091    // For bottom-up scheduling add the cycles needed for the current operation.
2092    if (!isTop())
2093      NextUnreserved += Cycles;
2094    return NextUnreserved;
2095  }
2096  
2097  /// Compute the next cycle at which the given processor resource can be
2098  /// scheduled.  Returns the next cycle and the index of the processor resource
2099  /// instance in the reserved cycles vector.
2100  std::pair<unsigned, unsigned>
2101  SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
2102                                      unsigned Cycles) {
2103  
2104    unsigned MinNextUnreserved = InvalidCycle;
2105    unsigned InstanceIdx = 0;
2106    unsigned StartIndex = ReservedCyclesIndex[PIdx];
2107    unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2108    assert(NumberOfInstances > 0 &&
2109           "Cannot have zero instances of a ProcResource");
2110  
2111    if (isUnbufferedGroup(PIdx)) {
2112      // If any subunits are used by the instruction, report that the resource
2113      // group is available at 0, effectively removing the group record from
2114      // hazarding and basing the hazarding decisions on the subunit records.
2115      // Otherwise, choose the first available instance from among the subunits.
2116      // Specifications which assign cycles to both the subunits and the group or
2117      // which use an unbuffered group with buffered subunits will appear to
2118      // schedule strangely. In the first case, the additional cycles for the
2119      // group will be ignored.  In the second, the group will be ignored
2120      // entirely.
2121      for (const MCWriteProcResEntry &PE :
2122           make_range(SchedModel->getWriteProcResBegin(SC),
2123                      SchedModel->getWriteProcResEnd(SC)))
2124        if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2125          return std::make_pair(0u, StartIndex);
2126  
2127      auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2128      for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2129        unsigned NextUnreserved, NextInstanceIdx;
2130        std::tie(NextUnreserved, NextInstanceIdx) =
2131            getNextResourceCycle(SC, SubUnits[I], Cycles);
2132        if (MinNextUnreserved > NextUnreserved) {
2133          InstanceIdx = NextInstanceIdx;
2134          MinNextUnreserved = NextUnreserved;
2135        }
2136      }
2137      return std::make_pair(MinNextUnreserved, InstanceIdx);
2138    }
2139  
2140    for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2141         ++I) {
2142      unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
2143      if (MinNextUnreserved > NextUnreserved) {
2144        InstanceIdx = I;
2145        MinNextUnreserved = NextUnreserved;
2146      }
2147    }
2148    return std::make_pair(MinNextUnreserved, InstanceIdx);
2149  }
2150  
2151  /// Does this SU have a hazard within the current instruction group.
2152  ///
2153  /// The scheduler supports two modes of hazard recognition. The first is the
2154  /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2155  /// supports highly complicated in-order reservation tables
2156  /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2157  ///
2158  /// The second is a streamlined mechanism that checks for hazards based on
2159  /// simple counters that the scheduler itself maintains. It explicitly checks
2160  /// for instruction dispatch limitations, including the number of micro-ops that
2161  /// can dispatch per cycle.
2162  ///
2163  /// TODO: Also check whether the SU must start a new group.
2164  bool SchedBoundary::checkHazard(SUnit *SU) {
2165    if (HazardRec->isEnabled()
2166        && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2167      return true;
2168    }
2169  
2170    unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2171    if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2172      LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
2173                        << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2174      return true;
2175    }
2176  
2177    if (CurrMOps > 0 &&
2178        ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2179         (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2180      LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
2181                        << (isTop() ? "begin" : "end") << " group\n");
2182      return true;
2183    }
2184  
2185    if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2186      const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2187      for (const MCWriteProcResEntry &PE :
2188            make_range(SchedModel->getWriteProcResBegin(SC),
2189                       SchedModel->getWriteProcResEnd(SC))) {
2190        unsigned ResIdx = PE.ProcResourceIdx;
2191        unsigned Cycles = PE.Cycles;
2192        unsigned NRCycle, InstanceIdx;
2193        std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
2194        if (NRCycle > CurrCycle) {
2195  #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2196          MaxObservedStall = std::max(Cycles, MaxObservedStall);
2197  #endif
2198          LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
2199                            << SchedModel->getResourceName(ResIdx)
2200                            << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'
2201                            << "=" << NRCycle << "c\n");
2202          return true;
2203        }
2204      }
2205    }
2206    return false;
2207  }
2208  
2209  // Find the unscheduled node in ReadySUs with the highest latency.
2210  unsigned SchedBoundary::
2211  findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2212    SUnit *LateSU = nullptr;
2213    unsigned RemLatency = 0;
2214    for (SUnit *SU : ReadySUs) {
2215      unsigned L = getUnscheduledLatency(SU);
2216      if (L > RemLatency) {
2217        RemLatency = L;
2218        LateSU = SU;
2219      }
2220    }
2221    if (LateSU) {
2222      LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2223                        << LateSU->NodeNum << ") " << RemLatency << "c\n");
2224    }
2225    return RemLatency;
2226  }
2227  
2228  // Count resources in this zone and the remaining unscheduled
2229  // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2230  // resource index, or zero if the zone is issue limited.
2231  unsigned SchedBoundary::
2232  getOtherResourceCount(unsigned &OtherCritIdx) {
2233    OtherCritIdx = 0;
2234    if (!SchedModel->hasInstrSchedModel())
2235      return 0;
2236  
2237    unsigned OtherCritCount = Rem->RemIssueCount
2238      + (RetiredMOps * SchedModel->getMicroOpFactor());
2239    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2240                      << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2241    for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2242         PIdx != PEnd; ++PIdx) {
2243      unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2244      if (OtherCount > OtherCritCount) {
2245        OtherCritCount = OtherCount;
2246        OtherCritIdx = PIdx;
2247      }
2248    }
2249    if (OtherCritIdx) {
2250      LLVM_DEBUG(
2251          dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2252                 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2253                 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2254    }
2255    return OtherCritCount;
2256  }
2257  
2258  void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2259                                  unsigned Idx) {
2260    assert(SU->getInstr() && "Scheduled SUnit must have instr");
2261  
2262  #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2263    // ReadyCycle was been bumped up to the CurrCycle when this node was
2264    // scheduled, but CurrCycle may have been eagerly advanced immediately after
2265    // scheduling, so may now be greater than ReadyCycle.
2266    if (ReadyCycle > CurrCycle)
2267      MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2268  #endif
2269  
2270    if (ReadyCycle < MinReadyCycle)
2271      MinReadyCycle = ReadyCycle;
2272  
2273    // Check for interlocks first. For the purpose of other heuristics, an
2274    // instruction that cannot issue appears as if it's not in the ReadyQueue.
2275    bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2276    bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2277                          checkHazard(SU) || (Available.size() >= ReadyListLimit);
2278  
2279    if (!HazardDetected) {
2280      Available.push(SU);
2281  
2282      if (InPQueue)
2283        Pending.remove(Pending.begin() + Idx);
2284      return;
2285    }
2286  
2287    if (!InPQueue)
2288      Pending.push(SU);
2289  }
2290  
2291  /// Move the boundary of scheduled code by one cycle.
2292  void SchedBoundary::bumpCycle(unsigned NextCycle) {
2293    if (SchedModel->getMicroOpBufferSize() == 0) {
2294      assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2295             "MinReadyCycle uninitialized");
2296      if (MinReadyCycle > NextCycle)
2297        NextCycle = MinReadyCycle;
2298    }
2299    // Update the current micro-ops, which will issue in the next cycle.
2300    unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2301    CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2302  
2303    // Decrement DependentLatency based on the next cycle.
2304    if ((NextCycle - CurrCycle) > DependentLatency)
2305      DependentLatency = 0;
2306    else
2307      DependentLatency -= (NextCycle - CurrCycle);
2308  
2309    if (!HazardRec->isEnabled()) {
2310      // Bypass HazardRec virtual calls.
2311      CurrCycle = NextCycle;
2312    } else {
2313      // Bypass getHazardType calls in case of long latency.
2314      for (; CurrCycle != NextCycle; ++CurrCycle) {
2315        if (isTop())
2316          HazardRec->AdvanceCycle();
2317        else
2318          HazardRec->RecedeCycle();
2319      }
2320    }
2321    CheckPending = true;
2322    IsResourceLimited =
2323        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2324                           getScheduledLatency(), true);
2325  
2326    LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2327                      << '\n');
2328  }
2329  
2330  void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2331    ExecutedResCounts[PIdx] += Count;
2332    if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2333      MaxExecutedResCount = ExecutedResCounts[PIdx];
2334  }
2335  
2336  /// Add the given processor resource to this scheduled zone.
2337  ///
2338  /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2339  /// during which this resource is consumed.
2340  ///
2341  /// \return the next cycle at which the instruction may execute without
2342  /// oversubscribing resources.
2343  unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2344                                        unsigned Cycles, unsigned NextCycle) {
2345    unsigned Factor = SchedModel->getResourceFactor(PIdx);
2346    unsigned Count = Factor * Cycles;
2347    LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
2348                      << Cycles << "x" << Factor << "u\n");
2349  
2350    // Update Executed resources counts.
2351    incExecutedResources(PIdx, Count);
2352    assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2353    Rem->RemainingCounts[PIdx] -= Count;
2354  
2355    // Check if this resource exceeds the current critical resource. If so, it
2356    // becomes the critical resource.
2357    if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2358      ZoneCritResIdx = PIdx;
2359      LLVM_DEBUG(dbgs() << "  *** Critical resource "
2360                        << SchedModel->getResourceName(PIdx) << ": "
2361                        << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2362                        << "c\n");
2363    }
2364    // For reserved resources, record the highest cycle using the resource.
2365    unsigned NextAvailable, InstanceIdx;
2366    std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
2367    if (NextAvailable > CurrCycle) {
2368      LLVM_DEBUG(dbgs() << "  Resource conflict: "
2369                        << SchedModel->getResourceName(PIdx)
2370                        << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'
2371                        << " reserved until @" << NextAvailable << "\n");
2372    }
2373    return NextAvailable;
2374  }
2375  
2376  /// Move the boundary of scheduled code by one SUnit.
2377  void SchedBoundary::bumpNode(SUnit *SU) {
2378    // Update the reservation table.
2379    if (HazardRec->isEnabled()) {
2380      if (!isTop() && SU->isCall) {
2381        // Calls are scheduled with their preceding instructions. For bottom-up
2382        // scheduling, clear the pipeline state before emitting.
2383        HazardRec->Reset();
2384      }
2385      HazardRec->EmitInstruction(SU);
2386      // Scheduling an instruction may have made pending instructions available.
2387      CheckPending = true;
2388    }
2389    // checkHazard should prevent scheduling multiple instructions per cycle that
2390    // exceed the issue width.
2391    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2392    unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2393    assert(
2394        (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2395        "Cannot schedule this instruction's MicroOps in the current cycle.");
2396  
2397    unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2398    LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2399  
2400    unsigned NextCycle = CurrCycle;
2401    switch (SchedModel->getMicroOpBufferSize()) {
2402    case 0:
2403      assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2404      break;
2405    case 1:
2406      if (ReadyCycle > NextCycle) {
2407        NextCycle = ReadyCycle;
2408        LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2409      }
2410      break;
2411    default:
2412      // We don't currently model the OOO reorder buffer, so consider all
2413      // scheduled MOps to be "retired". We do loosely model in-order resource
2414      // latency. If this instruction uses an in-order resource, account for any
2415      // likely stall cycles.
2416      if (SU->isUnbuffered && ReadyCycle > NextCycle)
2417        NextCycle = ReadyCycle;
2418      break;
2419    }
2420    RetiredMOps += IncMOps;
2421  
2422    // Update resource counts and critical resource.
2423    if (SchedModel->hasInstrSchedModel()) {
2424      unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2425      assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2426      Rem->RemIssueCount -= DecRemIssue;
2427      if (ZoneCritResIdx) {
2428        // Scale scheduled micro-ops for comparing with the critical resource.
2429        unsigned ScaledMOps =
2430          RetiredMOps * SchedModel->getMicroOpFactor();
2431  
2432        // If scaled micro-ops are now more than the previous critical resource by
2433        // a full cycle, then micro-ops issue becomes critical.
2434        if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2435            >= (int)SchedModel->getLatencyFactor()) {
2436          ZoneCritResIdx = 0;
2437          LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2438                            << ScaledMOps / SchedModel->getLatencyFactor()
2439                            << "c\n");
2440        }
2441      }
2442      for (TargetSchedModel::ProcResIter
2443             PI = SchedModel->getWriteProcResBegin(SC),
2444             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2445        unsigned RCycle =
2446          countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
2447        if (RCycle > NextCycle)
2448          NextCycle = RCycle;
2449      }
2450      if (SU->hasReservedResource) {
2451        // For reserved resources, record the highest cycle using the resource.
2452        // For top-down scheduling, this is the cycle in which we schedule this
2453        // instruction plus the number of cycles the operations reserves the
2454        // resource. For bottom-up is it simply the instruction's cycle.
2455        for (TargetSchedModel::ProcResIter
2456               PI = SchedModel->getWriteProcResBegin(SC),
2457               PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2458          unsigned PIdx = PI->ProcResourceIdx;
2459          if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2460            unsigned ReservedUntil, InstanceIdx;
2461            std::tie(ReservedUntil, InstanceIdx) =
2462                getNextResourceCycle(SC, PIdx, 0);
2463            if (isTop()) {
2464              ReservedCycles[InstanceIdx] =
2465                  std::max(ReservedUntil, NextCycle + PI->Cycles);
2466            } else
2467              ReservedCycles[InstanceIdx] = NextCycle;
2468          }
2469        }
2470      }
2471    }
2472    // Update ExpectedLatency and DependentLatency.
2473    unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2474    unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2475    if (SU->getDepth() > TopLatency) {
2476      TopLatency = SU->getDepth();
2477      LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
2478                        << SU->NodeNum << ") " << TopLatency << "c\n");
2479    }
2480    if (SU->getHeight() > BotLatency) {
2481      BotLatency = SU->getHeight();
2482      LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
2483                        << SU->NodeNum << ") " << BotLatency << "c\n");
2484    }
2485    // If we stall for any reason, bump the cycle.
2486    if (NextCycle > CurrCycle)
2487      bumpCycle(NextCycle);
2488    else
2489      // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2490      // resource limited. If a stall occurred, bumpCycle does this.
2491      IsResourceLimited =
2492          checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2493                             getScheduledLatency(), true);
2494  
2495    // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2496    // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2497    // one cycle.  Since we commonly reach the max MOps here, opportunistically
2498    // bump the cycle to avoid uselessly checking everything in the readyQ.
2499    CurrMOps += IncMOps;
2500  
2501    // Bump the cycle count for issue group constraints.
2502    // This must be done after NextCycle has been adjust for all other stalls.
2503    // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2504    // currCycle to X.
2505    if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2506        (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2507      LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
2508                        << " group\n");
2509      bumpCycle(++NextCycle);
2510    }
2511  
2512    while (CurrMOps >= SchedModel->getIssueWidth()) {
2513      LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
2514                        << CurrCycle << '\n');
2515      bumpCycle(++NextCycle);
2516    }
2517    LLVM_DEBUG(dumpScheduledState());
2518  }
2519  
2520  /// Release pending ready nodes in to the available queue. This makes them
2521  /// visible to heuristics.
2522  void SchedBoundary::releasePending() {
2523    // If the available queue is empty, it is safe to reset MinReadyCycle.
2524    if (Available.empty())
2525      MinReadyCycle = std::numeric_limits<unsigned>::max();
2526  
2527    // Check to see if any of the pending instructions are ready to issue.  If
2528    // so, add them to the available queue.
2529    for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2530      SUnit *SU = *(Pending.begin() + I);
2531      unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2532  
2533      if (ReadyCycle < MinReadyCycle)
2534        MinReadyCycle = ReadyCycle;
2535  
2536      if (Available.size() >= ReadyListLimit)
2537        break;
2538  
2539      releaseNode(SU, ReadyCycle, true, I);
2540      if (E != Pending.size()) {
2541        --I;
2542        --E;
2543      }
2544    }
2545    CheckPending = false;
2546  }
2547  
2548  /// Remove SU from the ready set for this boundary.
2549  void SchedBoundary::removeReady(SUnit *SU) {
2550    if (Available.isInQueue(SU))
2551      Available.remove(Available.find(SU));
2552    else {
2553      assert(Pending.isInQueue(SU) && "bad ready count");
2554      Pending.remove(Pending.find(SU));
2555    }
2556  }
2557  
2558  /// If this queue only has one ready candidate, return it. As a side effect,
2559  /// defer any nodes that now hit a hazard, and advance the cycle until at least
2560  /// one node is ready. If multiple instructions are ready, return NULL.
2561  SUnit *SchedBoundary::pickOnlyChoice() {
2562    if (CheckPending)
2563      releasePending();
2564  
2565    // Defer any ready instrs that now have a hazard.
2566    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2567      if (checkHazard(*I)) {
2568        Pending.push(*I);
2569        I = Available.remove(I);
2570        continue;
2571      }
2572      ++I;
2573    }
2574    for (unsigned i = 0; Available.empty(); ++i) {
2575  //  FIXME: Re-enable assert once PR20057 is resolved.
2576  //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2577  //           "permanent hazard");
2578      (void)i;
2579      bumpCycle(CurrCycle + 1);
2580      releasePending();
2581    }
2582  
2583    LLVM_DEBUG(Pending.dump());
2584    LLVM_DEBUG(Available.dump());
2585  
2586    if (Available.size() == 1)
2587      return *Available.begin();
2588    return nullptr;
2589  }
2590  
2591  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2592  // This is useful information to dump after bumpNode.
2593  // Note that the Queue contents are more useful before pickNodeFromQueue.
2594  LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2595    unsigned ResFactor;
2596    unsigned ResCount;
2597    if (ZoneCritResIdx) {
2598      ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2599      ResCount = getResourceCount(ZoneCritResIdx);
2600    } else {
2601      ResFactor = SchedModel->getMicroOpFactor();
2602      ResCount = RetiredMOps * ResFactor;
2603    }
2604    unsigned LFactor = SchedModel->getLatencyFactor();
2605    dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2606           << "  Retired: " << RetiredMOps;
2607    dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2608    dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2609           << ResCount / ResFactor << " "
2610           << SchedModel->getResourceName(ZoneCritResIdx)
2611           << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2612           << (IsResourceLimited ? "  - Resource" : "  - Latency")
2613           << " limited.\n";
2614  }
2615  #endif
2616  
2617  //===----------------------------------------------------------------------===//
2618  // GenericScheduler - Generic implementation of MachineSchedStrategy.
2619  //===----------------------------------------------------------------------===//
2620  
2621  void GenericSchedulerBase::SchedCandidate::
2622  initResourceDelta(const ScheduleDAGMI *DAG,
2623                    const TargetSchedModel *SchedModel) {
2624    if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2625      return;
2626  
2627    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2628    for (TargetSchedModel::ProcResIter
2629           PI = SchedModel->getWriteProcResBegin(SC),
2630           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2631      if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2632        ResDelta.CritResources += PI->Cycles;
2633      if (PI->ProcResourceIdx == Policy.DemandResIdx)
2634        ResDelta.DemandedResources += PI->Cycles;
2635    }
2636  }
2637  
2638  /// Compute remaining latency. We need this both to determine whether the
2639  /// overall schedule has become latency-limited and whether the instructions
2640  /// outside this zone are resource or latency limited.
2641  ///
2642  /// The "dependent" latency is updated incrementally during scheduling as the
2643  /// max height/depth of scheduled nodes minus the cycles since it was
2644  /// scheduled:
2645  ///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2646  ///
2647  /// The "independent" latency is the max ready queue depth:
2648  ///   ILat = max N.depth for N in Available|Pending
2649  ///
2650  /// RemainingLatency is the greater of independent and dependent latency.
2651  ///
2652  /// These computations are expensive, especially in DAGs with many edges, so
2653  /// only do them if necessary.
2654  static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2655    unsigned RemLatency = CurrZone.getDependentLatency();
2656    RemLatency = std::max(RemLatency,
2657                          CurrZone.findMaxLatency(CurrZone.Available.elements()));
2658    RemLatency = std::max(RemLatency,
2659                          CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2660    return RemLatency;
2661  }
2662  
2663  /// Returns true if the current cycle plus remaning latency is greater than
2664  /// the critical path in the scheduling region.
2665  bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2666                                                 SchedBoundary &CurrZone,
2667                                                 bool ComputeRemLatency,
2668                                                 unsigned &RemLatency) const {
2669    // The current cycle is already greater than the critical path, so we are
2670    // already latency limited and don't need to compute the remaining latency.
2671    if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2672      return true;
2673  
2674    // If we haven't scheduled anything yet, then we aren't latency limited.
2675    if (CurrZone.getCurrCycle() == 0)
2676      return false;
2677  
2678    if (ComputeRemLatency)
2679      RemLatency = computeRemLatency(CurrZone);
2680  
2681    return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2682  }
2683  
2684  /// Set the CandPolicy given a scheduling zone given the current resources and
2685  /// latencies inside and outside the zone.
2686  void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2687                                       SchedBoundary &CurrZone,
2688                                       SchedBoundary *OtherZone) {
2689    // Apply preemptive heuristics based on the total latency and resources
2690    // inside and outside this zone. Potential stalls should be considered before
2691    // following this policy.
2692  
2693    // Compute the critical resource outside the zone.
2694    unsigned OtherCritIdx = 0;
2695    unsigned OtherCount =
2696      OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2697  
2698    bool OtherResLimited = false;
2699    unsigned RemLatency = 0;
2700    bool RemLatencyComputed = false;
2701    if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2702      RemLatency = computeRemLatency(CurrZone);
2703      RemLatencyComputed = true;
2704      OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2705                                           OtherCount, RemLatency, false);
2706    }
2707  
2708    // Schedule aggressively for latency in PostRA mode. We don't check for
2709    // acyclic latency during PostRA, and highly out-of-order processors will
2710    // skip PostRA scheduling.
2711    if (!OtherResLimited &&
2712        (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2713                                         RemLatency))) {
2714      Policy.ReduceLatency |= true;
2715      LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2716                        << " RemainingLatency " << RemLatency << " + "
2717                        << CurrZone.getCurrCycle() << "c > CritPath "
2718                        << Rem.CriticalPath << "\n");
2719    }
2720    // If the same resource is limiting inside and outside the zone, do nothing.
2721    if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2722      return;
2723  
2724    LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2725      dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2726             << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2727    } if (OtherResLimited) dbgs()
2728                   << "  RemainingLimit: "
2729                   << SchedModel->getResourceName(OtherCritIdx) << "\n";
2730               if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2731               << "  Latency limited both directions.\n");
2732  
2733    if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2734      Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2735  
2736    if (OtherResLimited)
2737      Policy.DemandResIdx = OtherCritIdx;
2738  }
2739  
2740  #ifndef NDEBUG
2741  const char *GenericSchedulerBase::getReasonStr(
2742    GenericSchedulerBase::CandReason Reason) {
2743    switch (Reason) {
2744    case NoCand:         return "NOCAND    ";
2745    case Only1:          return "ONLY1     ";
2746    case PhysReg:        return "PHYS-REG  ";
2747    case RegExcess:      return "REG-EXCESS";
2748    case RegCritical:    return "REG-CRIT  ";
2749    case Stall:          return "STALL     ";
2750    case Cluster:        return "CLUSTER   ";
2751    case Weak:           return "WEAK      ";
2752    case RegMax:         return "REG-MAX   ";
2753    case ResourceReduce: return "RES-REDUCE";
2754    case ResourceDemand: return "RES-DEMAND";
2755    case TopDepthReduce: return "TOP-DEPTH ";
2756    case TopPathReduce:  return "TOP-PATH  ";
2757    case BotHeightReduce:return "BOT-HEIGHT";
2758    case BotPathReduce:  return "BOT-PATH  ";
2759    case NextDefUse:     return "DEF-USE   ";
2760    case NodeOrder:      return "ORDER     ";
2761    };
2762    llvm_unreachable("Unknown reason!");
2763  }
2764  
2765  void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2766    PressureChange P;
2767    unsigned ResIdx = 0;
2768    unsigned Latency = 0;
2769    switch (Cand.Reason) {
2770    default:
2771      break;
2772    case RegExcess:
2773      P = Cand.RPDelta.Excess;
2774      break;
2775    case RegCritical:
2776      P = Cand.RPDelta.CriticalMax;
2777      break;
2778    case RegMax:
2779      P = Cand.RPDelta.CurrentMax;
2780      break;
2781    case ResourceReduce:
2782      ResIdx = Cand.Policy.ReduceResIdx;
2783      break;
2784    case ResourceDemand:
2785      ResIdx = Cand.Policy.DemandResIdx;
2786      break;
2787    case TopDepthReduce:
2788      Latency = Cand.SU->getDepth();
2789      break;
2790    case TopPathReduce:
2791      Latency = Cand.SU->getHeight();
2792      break;
2793    case BotHeightReduce:
2794      Latency = Cand.SU->getHeight();
2795      break;
2796    case BotPathReduce:
2797      Latency = Cand.SU->getDepth();
2798      break;
2799    }
2800    dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2801    if (P.isValid())
2802      dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2803             << ":" << P.getUnitInc() << " ";
2804    else
2805      dbgs() << "      ";
2806    if (ResIdx)
2807      dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2808    else
2809      dbgs() << "         ";
2810    if (Latency)
2811      dbgs() << " " << Latency << " cycles ";
2812    else
2813      dbgs() << "          ";
2814    dbgs() << '\n';
2815  }
2816  #endif
2817  
2818  namespace llvm {
2819  /// Return true if this heuristic determines order.
2820  /// TODO: Consider refactor return type of these functions as integer or enum,
2821  /// as we may need to differentiate whether TryCand is better than Cand.
2822  bool tryLess(int TryVal, int CandVal,
2823               GenericSchedulerBase::SchedCandidate &TryCand,
2824               GenericSchedulerBase::SchedCandidate &Cand,
2825               GenericSchedulerBase::CandReason Reason) {
2826    if (TryVal < CandVal) {
2827      TryCand.Reason = Reason;
2828      return true;
2829    }
2830    if (TryVal > CandVal) {
2831      if (Cand.Reason > Reason)
2832        Cand.Reason = Reason;
2833      return true;
2834    }
2835    return false;
2836  }
2837  
2838  bool tryGreater(int TryVal, int CandVal,
2839                  GenericSchedulerBase::SchedCandidate &TryCand,
2840                  GenericSchedulerBase::SchedCandidate &Cand,
2841                  GenericSchedulerBase::CandReason Reason) {
2842    if (TryVal > CandVal) {
2843      TryCand.Reason = Reason;
2844      return true;
2845    }
2846    if (TryVal < CandVal) {
2847      if (Cand.Reason > Reason)
2848        Cand.Reason = Reason;
2849      return true;
2850    }
2851    return false;
2852  }
2853  
2854  bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2855                  GenericSchedulerBase::SchedCandidate &Cand,
2856                  SchedBoundary &Zone) {
2857    if (Zone.isTop()) {
2858      // Prefer the candidate with the lesser depth, but only if one of them has
2859      // depth greater than the total latency scheduled so far, otherwise either
2860      // of them could be scheduled now with no stall.
2861      if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
2862          Zone.getScheduledLatency()) {
2863        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2864                    TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2865          return true;
2866      }
2867      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2868                     TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2869        return true;
2870    } else {
2871      // Prefer the candidate with the lesser height, but only if one of them has
2872      // height greater than the total latency scheduled so far, otherwise either
2873      // of them could be scheduled now with no stall.
2874      if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
2875          Zone.getScheduledLatency()) {
2876        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2877                    TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2878          return true;
2879      }
2880      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2881                     TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2882        return true;
2883    }
2884    return false;
2885  }
2886  } // end namespace llvm
2887  
2888  static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2889    LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2890                      << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2891  }
2892  
2893  static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2894    tracePick(Cand.Reason, Cand.AtTop);
2895  }
2896  
2897  void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2898    assert(dag->hasVRegLiveness() &&
2899           "(PreRA)GenericScheduler needs vreg liveness");
2900    DAG = static_cast<ScheduleDAGMILive*>(dag);
2901    SchedModel = DAG->getSchedModel();
2902    TRI = DAG->TRI;
2903  
2904    if (RegionPolicy.ComputeDFSResult)
2905      DAG->computeDFSResult();
2906  
2907    Rem.init(DAG, SchedModel);
2908    Top.init(DAG, SchedModel, &Rem);
2909    Bot.init(DAG, SchedModel, &Rem);
2910  
2911    // Initialize resource counts.
2912  
2913    // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2914    // are disabled, then these HazardRecs will be disabled.
2915    const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2916    if (!Top.HazardRec) {
2917      Top.HazardRec =
2918          DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2919              Itin, DAG);
2920    }
2921    if (!Bot.HazardRec) {
2922      Bot.HazardRec =
2923          DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2924              Itin, DAG);
2925    }
2926    TopCand.SU = nullptr;
2927    BotCand.SU = nullptr;
2928  }
2929  
2930  /// Initialize the per-region scheduling policy.
2931  void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2932                                    MachineBasicBlock::iterator End,
2933                                    unsigned NumRegionInstrs) {
2934    const MachineFunction &MF = *Begin->getMF();
2935    const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2936  
2937    // Avoid setting up the register pressure tracker for small regions to save
2938    // compile time. As a rough heuristic, only track pressure when the number of
2939    // schedulable instructions exceeds half the integer register file.
2940    RegionPolicy.ShouldTrackPressure = true;
2941    for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2942      MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2943      if (TLI->isTypeLegal(LegalIntVT)) {
2944        unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2945          TLI->getRegClassFor(LegalIntVT));
2946        RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2947      }
2948    }
2949  
2950    // For generic targets, we default to bottom-up, because it's simpler and more
2951    // compile-time optimizations have been implemented in that direction.
2952    RegionPolicy.OnlyBottomUp = true;
2953  
2954    // Allow the subtarget to override default policy.
2955    MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2956  
2957    // After subtarget overrides, apply command line options.
2958    if (!EnableRegPressure) {
2959      RegionPolicy.ShouldTrackPressure = false;
2960      RegionPolicy.ShouldTrackLaneMasks = false;
2961    }
2962  
2963    // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2964    // e.g. -misched-bottomup=false allows scheduling in both directions.
2965    assert((!ForceTopDown || !ForceBottomUp) &&
2966           "-misched-topdown incompatible with -misched-bottomup");
2967    if (ForceBottomUp.getNumOccurrences() > 0) {
2968      RegionPolicy.OnlyBottomUp = ForceBottomUp;
2969      if (RegionPolicy.OnlyBottomUp)
2970        RegionPolicy.OnlyTopDown = false;
2971    }
2972    if (ForceTopDown.getNumOccurrences() > 0) {
2973      RegionPolicy.OnlyTopDown = ForceTopDown;
2974      if (RegionPolicy.OnlyTopDown)
2975        RegionPolicy.OnlyBottomUp = false;
2976    }
2977  }
2978  
2979  void GenericScheduler::dumpPolicy() const {
2980    // Cannot completely remove virtual function even in release mode.
2981  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2982    dbgs() << "GenericScheduler RegionPolicy: "
2983           << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2984           << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2985           << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2986           << "\n";
2987  #endif
2988  }
2989  
2990  /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2991  /// critical path by more cycles than it takes to drain the instruction buffer.
2992  /// We estimate an upper bounds on in-flight instructions as:
2993  ///
2994  /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2995  /// InFlightIterations = AcyclicPath / CyclesPerIteration
2996  /// InFlightResources = InFlightIterations * LoopResources
2997  ///
2998  /// TODO: Check execution resources in addition to IssueCount.
2999  void GenericScheduler::checkAcyclicLatency() {
3000    if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
3001      return;
3002  
3003    // Scaled number of cycles per loop iteration.
3004    unsigned IterCount =
3005      std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3006               Rem.RemIssueCount);
3007    // Scaled acyclic critical path.
3008    unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3009    // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3010    unsigned InFlightCount =
3011      (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3012    unsigned BufferLimit =
3013      SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3014  
3015    Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3016  
3017    LLVM_DEBUG(
3018        dbgs() << "IssueCycles="
3019               << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3020               << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3021               << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3022               << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3023               << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3024        if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
3025  }
3026  
3027  void GenericScheduler::registerRoots() {
3028    Rem.CriticalPath = DAG->ExitSU.getDepth();
3029  
3030    // Some roots may not feed into ExitSU. Check all of them in case.
3031    for (const SUnit *SU : Bot.Available) {
3032      if (SU->getDepth() > Rem.CriticalPath)
3033        Rem.CriticalPath = SU->getDepth();
3034    }
3035    LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3036    if (DumpCriticalPathLength) {
3037      errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3038    }
3039  
3040    if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3041      Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3042      checkAcyclicLatency();
3043    }
3044  }
3045  
3046  namespace llvm {
3047  bool tryPressure(const PressureChange &TryP,
3048                   const PressureChange &CandP,
3049                   GenericSchedulerBase::SchedCandidate &TryCand,
3050                   GenericSchedulerBase::SchedCandidate &Cand,
3051                   GenericSchedulerBase::CandReason Reason,
3052                   const TargetRegisterInfo *TRI,
3053                   const MachineFunction &MF) {
3054    // If one candidate decreases and the other increases, go with it.
3055    // Invalid candidates have UnitInc==0.
3056    if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3057                   Reason)) {
3058      return true;
3059    }
3060    // Do not compare the magnitude of pressure changes between top and bottom
3061    // boundary.
3062    if (Cand.AtTop != TryCand.AtTop)
3063      return false;
3064  
3065    // If both candidates affect the same set in the same boundary, go with the
3066    // smallest increase.
3067    unsigned TryPSet = TryP.getPSetOrMax();
3068    unsigned CandPSet = CandP.getPSetOrMax();
3069    if (TryPSet == CandPSet) {
3070      return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3071                     Reason);
3072    }
3073  
3074    int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3075                                   std::numeric_limits<int>::max();
3076  
3077    int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3078                                     std::numeric_limits<int>::max();
3079  
3080    // If the candidates are decreasing pressure, reverse priority.
3081    if (TryP.getUnitInc() < 0)
3082      std::swap(TryRank, CandRank);
3083    return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3084  }
3085  
3086  unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3087    return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3088  }
3089  
3090  /// Minimize physical register live ranges. Regalloc wants them adjacent to
3091  /// their physreg def/use.
3092  ///
3093  /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3094  /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3095  /// with the operation that produces or consumes the physreg. We'll do this when
3096  /// regalloc has support for parallel copies.
3097  int biasPhysReg(const SUnit *SU, bool isTop) {
3098    const MachineInstr *MI = SU->getInstr();
3099  
3100    if (MI->isCopy()) {
3101      unsigned ScheduledOper = isTop ? 1 : 0;
3102      unsigned UnscheduledOper = isTop ? 0 : 1;
3103      // If we have already scheduled the physreg produce/consumer, immediately
3104      // schedule the copy.
3105      if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
3106        return 1;
3107      // If the physreg is at the boundary, defer it. Otherwise schedule it
3108      // immediately to free the dependent. We can hoist the copy later.
3109      bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3110      if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
3111        return AtBoundary ? -1 : 1;
3112    }
3113  
3114    if (MI->isMoveImmediate()) {
3115      // If we have a move immediate and all successors have been assigned, bias
3116      // towards scheduling this later. Make sure all register defs are to
3117      // physical registers.
3118      bool DoBias = true;
3119      for (const MachineOperand &Op : MI->defs()) {
3120        if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
3121          DoBias = false;
3122          break;
3123        }
3124      }
3125  
3126      if (DoBias)
3127        return isTop ? -1 : 1;
3128    }
3129  
3130    return 0;
3131  }
3132  } // end namespace llvm
3133  
3134  void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
3135                                       bool AtTop,
3136                                       const RegPressureTracker &RPTracker,
3137                                       RegPressureTracker &TempTracker) {
3138    Cand.SU = SU;
3139    Cand.AtTop = AtTop;
3140    if (DAG->isTrackingPressure()) {
3141      if (AtTop) {
3142        TempTracker.getMaxDownwardPressureDelta(
3143          Cand.SU->getInstr(),
3144          Cand.RPDelta,
3145          DAG->getRegionCriticalPSets(),
3146          DAG->getRegPressure().MaxSetPressure);
3147      } else {
3148        if (VerifyScheduling) {
3149          TempTracker.getMaxUpwardPressureDelta(
3150            Cand.SU->getInstr(),
3151            &DAG->getPressureDiff(Cand.SU),
3152            Cand.RPDelta,
3153            DAG->getRegionCriticalPSets(),
3154            DAG->getRegPressure().MaxSetPressure);
3155        } else {
3156          RPTracker.getUpwardPressureDelta(
3157            Cand.SU->getInstr(),
3158            DAG->getPressureDiff(Cand.SU),
3159            Cand.RPDelta,
3160            DAG->getRegionCriticalPSets(),
3161            DAG->getRegPressure().MaxSetPressure);
3162        }
3163      }
3164    }
3165    LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3166               << "  Try  SU(" << Cand.SU->NodeNum << ") "
3167               << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3168               << Cand.RPDelta.Excess.getUnitInc() << "\n");
3169  }
3170  
3171  /// Apply a set of heuristics to a new candidate. Heuristics are currently
3172  /// hierarchical. This may be more efficient than a graduated cost model because
3173  /// we don't need to evaluate all aspects of the model for each node in the
3174  /// queue. But it's really done to make the heuristics easier to debug and
3175  /// statistically analyze.
3176  ///
3177  /// \param Cand provides the policy and current best candidate.
3178  /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3179  /// \param Zone describes the scheduled zone that we are extending, or nullptr
3180  ///             if Cand is from a different zone than TryCand.
3181  /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3182  bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
3183                                      SchedCandidate &TryCand,
3184                                      SchedBoundary *Zone) const {
3185    // Initialize the candidate if needed.
3186    if (!Cand.isValid()) {
3187      TryCand.Reason = NodeOrder;
3188      return true;
3189    }
3190  
3191    // Bias PhysReg Defs and copies to their uses and defined respectively.
3192    if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3193                   biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3194      return TryCand.Reason != NoCand;
3195  
3196    // Avoid exceeding the target's limit.
3197    if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3198                                                 Cand.RPDelta.Excess,
3199                                                 TryCand, Cand, RegExcess, TRI,
3200                                                 DAG->MF))
3201      return TryCand.Reason != NoCand;
3202  
3203    // Avoid increasing the max critical pressure in the scheduled region.
3204    if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3205                                                 Cand.RPDelta.CriticalMax,
3206                                                 TryCand, Cand, RegCritical, TRI,
3207                                                 DAG->MF))
3208      return TryCand.Reason != NoCand;
3209  
3210    // We only compare a subset of features when comparing nodes between
3211    // Top and Bottom boundary. Some properties are simply incomparable, in many
3212    // other instances we should only override the other boundary if something
3213    // is a clear good pick on one boundary. Skip heuristics that are more
3214    // "tie-breaking" in nature.
3215    bool SameBoundary = Zone != nullptr;
3216    if (SameBoundary) {
3217      // For loops that are acyclic path limited, aggressively schedule for
3218      // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3219      // heuristics to take precedence.
3220      if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3221          tryLatency(TryCand, Cand, *Zone))
3222        return TryCand.Reason != NoCand;
3223  
3224      // Prioritize instructions that read unbuffered resources by stall cycles.
3225      if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3226                  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3227        return TryCand.Reason != NoCand;
3228    }
3229  
3230    // Keep clustered nodes together to encourage downstream peephole
3231    // optimizations which may reduce resource requirements.
3232    //
3233    // This is a best effort to set things up for a post-RA pass. Optimizations
3234    // like generating loads of multiple registers should ideally be done within
3235    // the scheduler pass by combining the loads during DAG postprocessing.
3236    const SUnit *CandNextClusterSU =
3237      Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3238    const SUnit *TryCandNextClusterSU =
3239      TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3240    if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3241                   Cand.SU == CandNextClusterSU,
3242                   TryCand, Cand, Cluster))
3243      return TryCand.Reason != NoCand;
3244  
3245    if (SameBoundary) {
3246      // Weak edges are for clustering and other constraints.
3247      if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3248                  getWeakLeft(Cand.SU, Cand.AtTop),
3249                  TryCand, Cand, Weak))
3250        return TryCand.Reason != NoCand;
3251    }
3252  
3253    // Avoid increasing the max pressure of the entire region.
3254    if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3255                                                 Cand.RPDelta.CurrentMax,
3256                                                 TryCand, Cand, RegMax, TRI,
3257                                                 DAG->MF))
3258      return TryCand.Reason != NoCand;
3259  
3260    if (SameBoundary) {
3261      // Avoid critical resource consumption and balance the schedule.
3262      TryCand.initResourceDelta(DAG, SchedModel);
3263      if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3264                  TryCand, Cand, ResourceReduce))
3265        return TryCand.Reason != NoCand;
3266      if (tryGreater(TryCand.ResDelta.DemandedResources,
3267                     Cand.ResDelta.DemandedResources,
3268                     TryCand, Cand, ResourceDemand))
3269        return TryCand.Reason != NoCand;
3270  
3271      // Avoid serializing long latency dependence chains.
3272      // For acyclic path limited loops, latency was already checked above.
3273      if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3274          !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3275        return TryCand.Reason != NoCand;
3276  
3277      // Fall through to original instruction order.
3278      if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3279          || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3280        TryCand.Reason = NodeOrder;
3281        return true;
3282      }
3283    }
3284  
3285    return false;
3286  }
3287  
3288  /// Pick the best candidate from the queue.
3289  ///
3290  /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3291  /// DAG building. To adjust for the current scheduling location we need to
3292  /// maintain the number of vreg uses remaining to be top-scheduled.
3293  void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3294                                           const CandPolicy &ZonePolicy,
3295                                           const RegPressureTracker &RPTracker,
3296                                           SchedCandidate &Cand) {
3297    // getMaxPressureDelta temporarily modifies the tracker.
3298    RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3299  
3300    ReadyQueue &Q = Zone.Available;
3301    for (SUnit *SU : Q) {
3302  
3303      SchedCandidate TryCand(ZonePolicy);
3304      initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3305      // Pass SchedBoundary only when comparing nodes from the same boundary.
3306      SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3307      if (tryCandidate(Cand, TryCand, ZoneArg)) {
3308        // Initialize resource delta if needed in case future heuristics query it.
3309        if (TryCand.ResDelta == SchedResourceDelta())
3310          TryCand.initResourceDelta(DAG, SchedModel);
3311        Cand.setBest(TryCand);
3312        LLVM_DEBUG(traceCandidate(Cand));
3313      }
3314    }
3315  }
3316  
3317  /// Pick the best candidate node from either the top or bottom queue.
3318  SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3319    // Schedule as far as possible in the direction of no choice. This is most
3320    // efficient, but also provides the best heuristics for CriticalPSets.
3321    if (SUnit *SU = Bot.pickOnlyChoice()) {
3322      IsTopNode = false;
3323      tracePick(Only1, false);
3324      return SU;
3325    }
3326    if (SUnit *SU = Top.pickOnlyChoice()) {
3327      IsTopNode = true;
3328      tracePick(Only1, true);
3329      return SU;
3330    }
3331    // Set the bottom-up policy based on the state of the current bottom zone and
3332    // the instructions outside the zone, including the top zone.
3333    CandPolicy BotPolicy;
3334    setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3335    // Set the top-down policy based on the state of the current top zone and
3336    // the instructions outside the zone, including the bottom zone.
3337    CandPolicy TopPolicy;
3338    setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3339  
3340    // See if BotCand is still valid (because we previously scheduled from Top).
3341    LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3342    if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3343        BotCand.Policy != BotPolicy) {
3344      BotCand.reset(CandPolicy());
3345      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3346      assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3347    } else {
3348      LLVM_DEBUG(traceCandidate(BotCand));
3349  #ifndef NDEBUG
3350      if (VerifyScheduling) {
3351        SchedCandidate TCand;
3352        TCand.reset(CandPolicy());
3353        pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3354        assert(TCand.SU == BotCand.SU &&
3355               "Last pick result should correspond to re-picking right now");
3356      }
3357  #endif
3358    }
3359  
3360    // Check if the top Q has a better candidate.
3361    LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3362    if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3363        TopCand.Policy != TopPolicy) {
3364      TopCand.reset(CandPolicy());
3365      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3366      assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3367    } else {
3368      LLVM_DEBUG(traceCandidate(TopCand));
3369  #ifndef NDEBUG
3370      if (VerifyScheduling) {
3371        SchedCandidate TCand;
3372        TCand.reset(CandPolicy());
3373        pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3374        assert(TCand.SU == TopCand.SU &&
3375             "Last pick result should correspond to re-picking right now");
3376      }
3377  #endif
3378    }
3379  
3380    // Pick best from BotCand and TopCand.
3381    assert(BotCand.isValid());
3382    assert(TopCand.isValid());
3383    SchedCandidate Cand = BotCand;
3384    TopCand.Reason = NoCand;
3385    if (tryCandidate(Cand, TopCand, nullptr)) {
3386      Cand.setBest(TopCand);
3387      LLVM_DEBUG(traceCandidate(Cand));
3388    }
3389  
3390    IsTopNode = Cand.AtTop;
3391    tracePick(Cand);
3392    return Cand.SU;
3393  }
3394  
3395  /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3396  SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3397    if (DAG->top() == DAG->bottom()) {
3398      assert(Top.Available.empty() && Top.Pending.empty() &&
3399             Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3400      return nullptr;
3401    }
3402    SUnit *SU;
3403    do {
3404      if (RegionPolicy.OnlyTopDown) {
3405        SU = Top.pickOnlyChoice();
3406        if (!SU) {
3407          CandPolicy NoPolicy;
3408          TopCand.reset(NoPolicy);
3409          pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3410          assert(TopCand.Reason != NoCand && "failed to find a candidate");
3411          tracePick(TopCand);
3412          SU = TopCand.SU;
3413        }
3414        IsTopNode = true;
3415      } else if (RegionPolicy.OnlyBottomUp) {
3416        SU = Bot.pickOnlyChoice();
3417        if (!SU) {
3418          CandPolicy NoPolicy;
3419          BotCand.reset(NoPolicy);
3420          pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3421          assert(BotCand.Reason != NoCand && "failed to find a candidate");
3422          tracePick(BotCand);
3423          SU = BotCand.SU;
3424        }
3425        IsTopNode = false;
3426      } else {
3427        SU = pickNodeBidirectional(IsTopNode);
3428      }
3429    } while (SU->isScheduled);
3430  
3431    if (SU->isTopReady())
3432      Top.removeReady(SU);
3433    if (SU->isBottomReady())
3434      Bot.removeReady(SU);
3435  
3436    LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3437                      << *SU->getInstr());
3438    return SU;
3439  }
3440  
3441  void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
3442    MachineBasicBlock::iterator InsertPos = SU->getInstr();
3443    if (!isTop)
3444      ++InsertPos;
3445    SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3446  
3447    // Find already scheduled copies with a single physreg dependence and move
3448    // them just above the scheduled instruction.
3449    for (SDep &Dep : Deps) {
3450      if (Dep.getKind() != SDep::Data ||
3451          !Register::isPhysicalRegister(Dep.getReg()))
3452        continue;
3453      SUnit *DepSU = Dep.getSUnit();
3454      if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3455        continue;
3456      MachineInstr *Copy = DepSU->getInstr();
3457      if (!Copy->isCopy() && !Copy->isMoveImmediate())
3458        continue;
3459      LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
3460                 DAG->dumpNode(*Dep.getSUnit()));
3461      DAG->moveInstruction(Copy, InsertPos);
3462    }
3463  }
3464  
3465  /// Update the scheduler's state after scheduling a node. This is the same node
3466  /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3467  /// update it's state based on the current cycle before MachineSchedStrategy
3468  /// does.
3469  ///
3470  /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3471  /// them here. See comments in biasPhysReg.
3472  void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3473    if (IsTopNode) {
3474      SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3475      Top.bumpNode(SU);
3476      if (SU->hasPhysRegUses)
3477        reschedulePhysReg(SU, true);
3478    } else {
3479      SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3480      Bot.bumpNode(SU);
3481      if (SU->hasPhysRegDefs)
3482        reschedulePhysReg(SU, false);
3483    }
3484  }
3485  
3486  /// Create the standard converging machine scheduler. This will be used as the
3487  /// default scheduler if the target does not set a default.
3488  ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3489    ScheduleDAGMILive *DAG =
3490        new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3491    // Register DAG post-processors.
3492    //
3493    // FIXME: extend the mutation API to allow earlier mutations to instantiate
3494    // data and pass it to later mutations. Have a single mutation that gathers
3495    // the interesting nodes in one pass.
3496    DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3497    return DAG;
3498  }
3499  
3500  static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
3501    return createGenericSchedLive(C);
3502  }
3503  
3504  static MachineSchedRegistry
3505  GenericSchedRegistry("converge", "Standard converging scheduler.",
3506                       createConvergingSched);
3507  
3508  //===----------------------------------------------------------------------===//
3509  // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3510  //===----------------------------------------------------------------------===//
3511  
3512  void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3513    DAG = Dag;
3514    SchedModel = DAG->getSchedModel();
3515    TRI = DAG->TRI;
3516  
3517    Rem.init(DAG, SchedModel);
3518    Top.init(DAG, SchedModel, &Rem);
3519    BotRoots.clear();
3520  
3521    // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3522    // or are disabled, then these HazardRecs will be disabled.
3523    const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3524    if (!Top.HazardRec) {
3525      Top.HazardRec =
3526          DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3527              Itin, DAG);
3528    }
3529  }
3530  
3531  void PostGenericScheduler::registerRoots() {
3532    Rem.CriticalPath = DAG->ExitSU.getDepth();
3533  
3534    // Some roots may not feed into ExitSU. Check all of them in case.
3535    for (const SUnit *SU : BotRoots) {
3536      if (SU->getDepth() > Rem.CriticalPath)
3537        Rem.CriticalPath = SU->getDepth();
3538    }
3539    LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3540    if (DumpCriticalPathLength) {
3541      errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3542    }
3543  }
3544  
3545  /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3546  ///
3547  /// \param Cand provides the policy and current best candidate.
3548  /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3549  /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3550  bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3551                                          SchedCandidate &TryCand) {
3552    // Initialize the candidate if needed.
3553    if (!Cand.isValid()) {
3554      TryCand.Reason = NodeOrder;
3555      return true;
3556    }
3557  
3558    // Prioritize instructions that read unbuffered resources by stall cycles.
3559    if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3560                Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3561      return TryCand.Reason != NoCand;
3562  
3563    // Keep clustered nodes together.
3564    if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3565                   Cand.SU == DAG->getNextClusterSucc(),
3566                   TryCand, Cand, Cluster))
3567      return TryCand.Reason != NoCand;
3568  
3569    // Avoid critical resource consumption and balance the schedule.
3570    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3571                TryCand, Cand, ResourceReduce))
3572      return TryCand.Reason != NoCand;
3573    if (tryGreater(TryCand.ResDelta.DemandedResources,
3574                   Cand.ResDelta.DemandedResources,
3575                   TryCand, Cand, ResourceDemand))
3576      return TryCand.Reason != NoCand;
3577  
3578    // Avoid serializing long latency dependence chains.
3579    if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3580      return TryCand.Reason != NoCand;
3581    }
3582  
3583    // Fall through to original instruction order.
3584    if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3585      TryCand.Reason = NodeOrder;
3586      return true;
3587    }
3588  
3589    return false;
3590  }
3591  
3592  void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3593    ReadyQueue &Q = Top.Available;
3594    for (SUnit *SU : Q) {
3595      SchedCandidate TryCand(Cand.Policy);
3596      TryCand.SU = SU;
3597      TryCand.AtTop = true;
3598      TryCand.initResourceDelta(DAG, SchedModel);
3599      if (tryCandidate(Cand, TryCand)) {
3600        Cand.setBest(TryCand);
3601        LLVM_DEBUG(traceCandidate(Cand));
3602      }
3603    }
3604  }
3605  
3606  /// Pick the next node to schedule.
3607  SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3608    if (DAG->top() == DAG->bottom()) {
3609      assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3610      return nullptr;
3611    }
3612    SUnit *SU;
3613    do {
3614      SU = Top.pickOnlyChoice();
3615      if (SU) {
3616        tracePick(Only1, true);
3617      } else {
3618        CandPolicy NoPolicy;
3619        SchedCandidate TopCand(NoPolicy);
3620        // Set the top-down policy based on the state of the current top zone and
3621        // the instructions outside the zone, including the bottom zone.
3622        setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3623        pickNodeFromQueue(TopCand);
3624        assert(TopCand.Reason != NoCand && "failed to find a candidate");
3625        tracePick(TopCand);
3626        SU = TopCand.SU;
3627      }
3628    } while (SU->isScheduled);
3629  
3630    IsTopNode = true;
3631    Top.removeReady(SU);
3632  
3633    LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3634                      << *SU->getInstr());
3635    return SU;
3636  }
3637  
3638  /// Called after ScheduleDAGMI has scheduled an instruction and updated
3639  /// scheduled/remaining flags in the DAG nodes.
3640  void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3641    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3642    Top.bumpNode(SU);
3643  }
3644  
3645  ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3646    return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3647                             /*RemoveKillFlags=*/true);
3648  }
3649  
3650  //===----------------------------------------------------------------------===//
3651  // ILP Scheduler. Currently for experimental analysis of heuristics.
3652  //===----------------------------------------------------------------------===//
3653  
3654  namespace {
3655  
3656  /// Order nodes by the ILP metric.
3657  struct ILPOrder {
3658    const SchedDFSResult *DFSResult = nullptr;
3659    const BitVector *ScheduledTrees = nullptr;
3660    bool MaximizeILP;
3661  
3662    ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3663  
3664    /// Apply a less-than relation on node priority.
3665    ///
3666    /// (Return true if A comes after B in the Q.)
3667    bool operator()(const SUnit *A, const SUnit *B) const {
3668      unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3669      unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3670      if (SchedTreeA != SchedTreeB) {
3671        // Unscheduled trees have lower priority.
3672        if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3673          return ScheduledTrees->test(SchedTreeB);
3674  
3675        // Trees with shallower connections have have lower priority.
3676        if (DFSResult->getSubtreeLevel(SchedTreeA)
3677            != DFSResult->getSubtreeLevel(SchedTreeB)) {
3678          return DFSResult->getSubtreeLevel(SchedTreeA)
3679            < DFSResult->getSubtreeLevel(SchedTreeB);
3680        }
3681      }
3682      if (MaximizeILP)
3683        return DFSResult->getILP(A) < DFSResult->getILP(B);
3684      else
3685        return DFSResult->getILP(A) > DFSResult->getILP(B);
3686    }
3687  };
3688  
3689  /// Schedule based on the ILP metric.
3690  class ILPScheduler : public MachineSchedStrategy {
3691    ScheduleDAGMILive *DAG = nullptr;
3692    ILPOrder Cmp;
3693  
3694    std::vector<SUnit*> ReadyQ;
3695  
3696  public:
3697    ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3698  
3699    void initialize(ScheduleDAGMI *dag) override {
3700      assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3701      DAG = static_cast<ScheduleDAGMILive*>(dag);
3702      DAG->computeDFSResult();
3703      Cmp.DFSResult = DAG->getDFSResult();
3704      Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3705      ReadyQ.clear();
3706    }
3707  
3708    void registerRoots() override {
3709      // Restore the heap in ReadyQ with the updated DFS results.
3710      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3711    }
3712  
3713    /// Implement MachineSchedStrategy interface.
3714    /// -----------------------------------------
3715  
3716    /// Callback to select the highest priority node from the ready Q.
3717    SUnit *pickNode(bool &IsTopNode) override {
3718      if (ReadyQ.empty()) return nullptr;
3719      std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3720      SUnit *SU = ReadyQ.back();
3721      ReadyQ.pop_back();
3722      IsTopNode = false;
3723      LLVM_DEBUG(dbgs() << "Pick node "
3724                        << "SU(" << SU->NodeNum << ") "
3725                        << " ILP: " << DAG->getDFSResult()->getILP(SU)
3726                        << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3727                        << " @"
3728                        << DAG->getDFSResult()->getSubtreeLevel(
3729                               DAG->getDFSResult()->getSubtreeID(SU))
3730                        << '\n'
3731                        << "Scheduling " << *SU->getInstr());
3732      return SU;
3733    }
3734  
3735    /// Scheduler callback to notify that a new subtree is scheduled.
3736    void scheduleTree(unsigned SubtreeID) override {
3737      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3738    }
3739  
3740    /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3741    /// DFSResults, and resort the priority Q.
3742    void schedNode(SUnit *SU, bool IsTopNode) override {
3743      assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3744    }
3745  
3746    void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3747  
3748    void releaseBottomNode(SUnit *SU) override {
3749      ReadyQ.push_back(SU);
3750      std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3751    }
3752  };
3753  
3754  } // end anonymous namespace
3755  
3756  static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3757    return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3758  }
3759  static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3760    return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3761  }
3762  
3763  static MachineSchedRegistry ILPMaxRegistry(
3764    "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3765  static MachineSchedRegistry ILPMinRegistry(
3766    "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3767  
3768  //===----------------------------------------------------------------------===//
3769  // Machine Instruction Shuffler for Correctness Testing
3770  //===----------------------------------------------------------------------===//
3771  
3772  #ifndef NDEBUG
3773  namespace {
3774  
3775  /// Apply a less-than relation on the node order, which corresponds to the
3776  /// instruction order prior to scheduling. IsReverse implements greater-than.
3777  template<bool IsReverse>
3778  struct SUnitOrder {
3779    bool operator()(SUnit *A, SUnit *B) const {
3780      if (IsReverse)
3781        return A->NodeNum > B->NodeNum;
3782      else
3783        return A->NodeNum < B->NodeNum;
3784    }
3785  };
3786  
3787  /// Reorder instructions as much as possible.
3788  class InstructionShuffler : public MachineSchedStrategy {
3789    bool IsAlternating;
3790    bool IsTopDown;
3791  
3792    // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3793    // gives nodes with a higher number higher priority causing the latest
3794    // instructions to be scheduled first.
3795    PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3796      TopQ;
3797  
3798    // When scheduling bottom-up, use greater-than as the queue priority.
3799    PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3800      BottomQ;
3801  
3802  public:
3803    InstructionShuffler(bool alternate, bool topdown)
3804      : IsAlternating(alternate), IsTopDown(topdown) {}
3805  
3806    void initialize(ScheduleDAGMI*) override {
3807      TopQ.clear();
3808      BottomQ.clear();
3809    }
3810  
3811    /// Implement MachineSchedStrategy interface.
3812    /// -----------------------------------------
3813  
3814    SUnit *pickNode(bool &IsTopNode) override {
3815      SUnit *SU;
3816      if (IsTopDown) {
3817        do {
3818          if (TopQ.empty()) return nullptr;
3819          SU = TopQ.top();
3820          TopQ.pop();
3821        } while (SU->isScheduled);
3822        IsTopNode = true;
3823      } else {
3824        do {
3825          if (BottomQ.empty()) return nullptr;
3826          SU = BottomQ.top();
3827          BottomQ.pop();
3828        } while (SU->isScheduled);
3829        IsTopNode = false;
3830      }
3831      if (IsAlternating)
3832        IsTopDown = !IsTopDown;
3833      return SU;
3834    }
3835  
3836    void schedNode(SUnit *SU, bool IsTopNode) override {}
3837  
3838    void releaseTopNode(SUnit *SU) override {
3839      TopQ.push(SU);
3840    }
3841    void releaseBottomNode(SUnit *SU) override {
3842      BottomQ.push(SU);
3843    }
3844  };
3845  
3846  } // end anonymous namespace
3847  
3848  static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3849    bool Alternate = !ForceTopDown && !ForceBottomUp;
3850    bool TopDown = !ForceBottomUp;
3851    assert((TopDown || !ForceTopDown) &&
3852           "-misched-topdown incompatible with -misched-bottomup");
3853    return new ScheduleDAGMILive(
3854        C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3855  }
3856  
3857  static MachineSchedRegistry ShufflerRegistry(
3858    "shuffle", "Shuffle machine instructions alternating directions",
3859    createInstructionShuffler);
3860  #endif // !NDEBUG
3861  
3862  //===----------------------------------------------------------------------===//
3863  // GraphWriter support for ScheduleDAGMILive.
3864  //===----------------------------------------------------------------------===//
3865  
3866  #ifndef NDEBUG
3867  namespace llvm {
3868  
3869  template<> struct GraphTraits<
3870    ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3871  
3872  template<>
3873  struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3874    DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3875  
3876    static std::string getGraphName(const ScheduleDAG *G) {
3877      return std::string(G->MF.getName());
3878    }
3879  
3880    static bool renderGraphFromBottomUp() {
3881      return true;
3882    }
3883  
3884    static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
3885      if (ViewMISchedCutoff == 0)
3886        return false;
3887      return (Node->Preds.size() > ViewMISchedCutoff
3888           || Node->Succs.size() > ViewMISchedCutoff);
3889    }
3890  
3891    /// If you want to override the dot attributes printed for a particular
3892    /// edge, override this method.
3893    static std::string getEdgeAttributes(const SUnit *Node,
3894                                         SUnitIterator EI,
3895                                         const ScheduleDAG *Graph) {
3896      if (EI.isArtificialDep())
3897        return "color=cyan,style=dashed";
3898      if (EI.isCtrlDep())
3899        return "color=blue,style=dashed";
3900      return "";
3901    }
3902  
3903    static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3904      std::string Str;
3905      raw_string_ostream SS(Str);
3906      const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3907      const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3908        static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3909      SS << "SU:" << SU->NodeNum;
3910      if (DFS)
3911        SS << " I:" << DFS->getNumInstrs(SU);
3912      return SS.str();
3913    }
3914  
3915    static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3916      return G->getGraphNodeLabel(SU);
3917    }
3918  
3919    static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3920      std::string Str("shape=Mrecord");
3921      const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3922      const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3923        static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3924      if (DFS) {
3925        Str += ",style=filled,fillcolor=\"#";
3926        Str += DOT::getColorString(DFS->getSubtreeID(N));
3927        Str += '"';
3928      }
3929      return Str;
3930    }
3931  };
3932  
3933  } // end namespace llvm
3934  #endif // NDEBUG
3935  
3936  /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3937  /// rendered using 'dot'.
3938  void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3939  #ifndef NDEBUG
3940    ViewGraph(this, Name, false, Title);
3941  #else
3942    errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3943           << "systems with Graphviz or gv!\n";
3944  #endif  // NDEBUG
3945  }
3946  
3947  /// Out-of-line implementation with no arguments is handy for gdb.
3948  void ScheduleDAGMI::viewGraph() {
3949    viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3950  }
3951