xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // Software pipelining (SWP) is an instruction scheduling technique for loops
12 // that overlap loop iterations and exploits ILP via a compiler transformation.
13 //
14 // Swing Modulo Scheduling is an implementation of software pipelining
15 // that generates schedules that are near optimal in terms of initiation
16 // interval, register requirements, and stage count. See the papers:
17 //
18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20 // Conference on Parallel Architectures and Compilation Techiniques.
21 //
22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24 // Transactions on Computers, Vol. 50, No. 3, 2001.
25 //
26 // "An Implementation of Swing Modulo Scheduling With Extensions for
27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28 // Urbana-Champaign, 2005.
29 //
30 //
31 // The SMS algorithm consists of three main steps after computing the minimal
32 // initiation interval (MII).
33 // 1) Analyze the dependence graph and compute information about each
34 //    instruction in the graph.
35 // 2) Order the nodes (instructions) by priority based upon the heuristics
36 //    described in the algorithm.
37 // 3) Attempt to schedule the nodes in the specified order using the MII.
38 //
39 //===----------------------------------------------------------------------===//
40 #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
41 #define LLVM_CODEGEN_MACHINEPIPELINER_H
42 
43 #include "llvm/ADT/STLExtras.h"
44 #include "llvm/ADT/SetVector.h"
45 #include "llvm/CodeGen/DFAPacketizer.h"
46 #include "llvm/CodeGen/MachineDominators.h"
47 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
48 #include "llvm/CodeGen/MachineScheduler.h"
49 #include "llvm/CodeGen/RegisterClassInfo.h"
50 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
51 #include "llvm/CodeGen/ScheduleDAGMutation.h"
52 #include "llvm/CodeGen/TargetInstrInfo.h"
53 #include "llvm/CodeGen/WindowScheduler.h"
54 #include "llvm/InitializePasses.h"
55 
56 #include <deque>
57 
58 namespace llvm {
59 
60 class AAResults;
61 class NodeSet;
62 class SMSchedule;
63 
64 extern cl::opt<bool> SwpEnableCopyToPhi;
65 extern cl::opt<int> SwpForceIssueWidth;
66 
67 /// The main class in the implementation of the target independent
68 /// software pipeliner pass.
69 class MachinePipeliner : public MachineFunctionPass {
70 public:
71   MachineFunction *MF = nullptr;
72   MachineOptimizationRemarkEmitter *ORE = nullptr;
73   const MachineLoopInfo *MLI = nullptr;
74   const MachineDominatorTree *MDT = nullptr;
75   const InstrItineraryData *InstrItins = nullptr;
76   const TargetInstrInfo *TII = nullptr;
77   RegisterClassInfo RegClassInfo;
78   bool disabledByPragma = false;
79   unsigned II_setByPragma = 0;
80 
81 #ifndef NDEBUG
82   static int NumTries;
83 #endif
84 
85   /// Cache the target analysis information about the loop.
86   struct LoopInfo {
87     MachineBasicBlock *TBB = nullptr;
88     MachineBasicBlock *FBB = nullptr;
89     SmallVector<MachineOperand, 4> BrCond;
90     MachineInstr *LoopInductionVar = nullptr;
91     MachineInstr *LoopCompare = nullptr;
92     std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
93         nullptr;
94   };
95   LoopInfo LI;
96 
97   static char ID;
98 
MachinePipeliner()99   MachinePipeliner() : MachineFunctionPass(ID) {
100     initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
101   }
102 
103   bool runOnMachineFunction(MachineFunction &MF) override;
104 
105   void getAnalysisUsage(AnalysisUsage &AU) const override;
106 
107 private:
108   void preprocessPhiNodes(MachineBasicBlock &B);
109   bool canPipelineLoop(MachineLoop &L);
110   bool scheduleLoop(MachineLoop &L);
111   bool swingModuloScheduler(MachineLoop &L);
112   void setPragmaPipelineOptions(MachineLoop &L);
113   bool runWindowScheduler(MachineLoop &L);
114   bool useSwingModuloScheduler();
115   bool useWindowScheduler(bool Changed);
116 };
117 
118 /// Represents a dependence between two instruction.
119 class SwingSchedulerDDGEdge {
120   SUnit *Dst = nullptr;
121   SDep Pred;
122   unsigned Distance = 0;
123   bool IsValidationOnly = false;
124 
125 public:
126   /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
127   /// \p Dep in the original DAG. This pair has no information about the
128   /// direction of the edge, so we need to pass an additional argument \p
129   /// IsSucc.
SwingSchedulerDDGEdge(SUnit * PredOrSucc,const SDep & Dep,bool IsSucc,bool IsValidationOnly)130   SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc,
131                         bool IsValidationOnly)
132       : Dst(PredOrSucc), Pred(Dep), Distance(0u),
133         IsValidationOnly(IsValidationOnly) {
134     SUnit *Src = Dep.getSUnit();
135 
136     if (IsSucc) {
137       std::swap(Src, Dst);
138       Pred.setSUnit(Src);
139     }
140 
141     // An anti-dependence to PHI means loop-carried dependence.
142     if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
143       Distance = 1;
144       std::swap(Src, Dst);
145       auto Reg = Pred.getReg();
146       Pred = SDep(Src, SDep::Kind::Data, Reg);
147     }
148   }
149 
150   /// Returns the SUnit from which the edge comes (source node).
getSrc()151   SUnit *getSrc() const { return Pred.getSUnit(); }
152 
153   /// Returns the SUnit to which the edge points (destination node).
getDst()154   SUnit *getDst() const { return Dst; }
155 
156   /// Returns the latency value for the edge.
getLatency()157   unsigned getLatency() const { return Pred.getLatency(); }
158 
159   /// Sets the latency for the edge.
setLatency(unsigned Latency)160   void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
161 
162   /// Returns the distance value for the edge.
getDistance()163   unsigned getDistance() const { return Distance; }
164 
165   /// Sets the distance value for the edge.
setDistance(unsigned D)166   void setDistance(unsigned D) { Distance = D; }
167 
168   /// Returns the register associated with the edge.
getReg()169   Register getReg() const { return Pred.getReg(); }
170 
171   /// Returns true if the edge represents anti dependence.
isAntiDep()172   bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
173 
174   /// Returns true if the edge represents output dependence.
isOutputDep()175   bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
176 
177   /// Returns true if the edge represents a dependence that is not data, anti or
178   /// output dependence.
isOrderDep()179   bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
180 
181   /// Returns true if the edge represents unknown scheduling barrier.
isBarrier()182   bool isBarrier() const { return Pred.isBarrier(); }
183 
184   /// Returns true if the edge represents an artificial dependence.
isArtificial()185   bool isArtificial() const { return Pred.isArtificial(); }
186 
187   /// Tests if this is a Data dependence that is associated with a register.
isAssignedRegDep()188   bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
189 
190   /// Returns true for DDG nodes that we ignore when computing the cost
191   /// functions. We ignore the back-edge recurrence in order to avoid unbounded
192   /// recursion in the calculation of the ASAP, ALAP, etc functions.
193   bool ignoreDependence(bool IgnoreAnti) const;
194 
195   /// Returns true if this edge is intended to be used only for validating the
196   /// schedule.
isValidationOnly()197   bool isValidationOnly() const { return IsValidationOnly; }
198 };
199 
200 /// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't
201 /// assume cycle dependencies as the name suggests, such dependencies must be
202 /// handled separately. After DAG construction is finished, these dependencies
203 /// are added to SwingSchedulerDDG.
204 /// TODO: Also handle output-dependencies introduced by physical registers.
205 struct LoopCarriedEdges {
206   using OrderDep = SmallSetVector<SUnit *, 8>;
207   using OrderDepsType = DenseMap<SUnit *, OrderDep>;
208 
209   OrderDepsType OrderDeps;
210 
getOrderDepOrNullLoopCarriedEdges211   const OrderDep *getOrderDepOrNull(SUnit *Key) const {
212     auto Ite = OrderDeps.find(Key);
213     if (Ite == OrderDeps.end())
214       return nullptr;
215     return &Ite->second;
216   }
217 
218   /// Adds some edges to the original DAG that correspond to loop-carried
219   /// dependencies. Historically, loop-carried edges are represented by using
220   /// non-loop-carried edges in the original DAG. This function appends such
221   /// edges to preserve the previous behavior.
222   void modifySUnits(std::vector<SUnit> &SUnits, const TargetInstrInfo *TII);
223 
224   void dump(SUnit *SU, const TargetRegisterInfo *TRI,
225             const MachineRegisterInfo *MRI) const;
226 };
227 
228 /// This class provides APIs to retrieve edges from/to an SUnit node, with a
229 /// particular focus on loop-carried dependencies. Since SUnit is not designed
230 /// to represent such edges, handling them directly using its APIs has required
231 /// non-trivial logic in the past. This class serves as a wrapper around SUnit,
232 /// offering a simpler interface for managing these dependencies.
233 class SwingSchedulerDDG {
234   using EdgesType = SmallVector<SwingSchedulerDDGEdge, 4>;
235 
236   struct SwingSchedulerDDGEdges {
237     EdgesType Preds;
238     EdgesType Succs;
239   };
240 
241   void initEdges(SUnit *SU);
242 
243   SUnit *EntrySU;
244   SUnit *ExitSU;
245 
246   std::vector<SwingSchedulerDDGEdges> EdgesVec;
247   SwingSchedulerDDGEdges EntrySUEdges;
248   SwingSchedulerDDGEdges ExitSUEdges;
249 
250   /// Edges that are used only when validating the schedule. These edges are
251   /// not considered to drive the optimization heuristics.
252   SmallVector<SwingSchedulerDDGEdge, 8> ValidationOnlyEdges;
253 
254   /// Adds a NON-validation-only edge to the DDG. Assumes to be called only by
255   /// the ctor.
256   void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
257 
258   SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
259   const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
260 
261 public:
262   SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU,
263                     const LoopCarriedEdges &LCE);
264 
265   const EdgesType &getInEdges(const SUnit *SU) const;
266 
267   const EdgesType &getOutEdges(const SUnit *SU) const;
268 
269   bool isValidSchedule(const SMSchedule &Schedule) const;
270 };
271 
272 /// This class builds the dependence graph for the instructions in a loop,
273 /// and attempts to schedule the instructions using the SMS algorithm.
274 class SwingSchedulerDAG : public ScheduleDAGInstrs {
275   MachinePipeliner &Pass;
276 
277   std::unique_ptr<SwingSchedulerDDG> DDG;
278 
279   /// The minimum initiation interval between iterations for this schedule.
280   unsigned MII = 0;
281   /// The maximum initiation interval between iterations for this schedule.
282   unsigned MAX_II = 0;
283   /// Set to true if a valid pipelined schedule is found for the loop.
284   bool Scheduled = false;
285   MachineLoop &Loop;
286   LiveIntervals &LIS;
287   const RegisterClassInfo &RegClassInfo;
288   unsigned II_setByPragma = 0;
289   TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
290 
291   /// A topological ordering of the SUnits, which is needed for changing
292   /// dependences and iterating over the SUnits.
293   ScheduleDAGTopologicalSort Topo;
294 
295   struct NodeInfo {
296     int ASAP = 0;
297     int ALAP = 0;
298     int ZeroLatencyDepth = 0;
299     int ZeroLatencyHeight = 0;
300 
301     NodeInfo() = default;
302   };
303   /// Computed properties for each node in the graph.
304   std::vector<NodeInfo> ScheduleInfo;
305 
306   enum OrderKind { BottomUp = 0, TopDown = 1 };
307   /// Computed node ordering for scheduling.
308   SetVector<SUnit *> NodeOrder;
309 
310   using NodeSetType = SmallVector<NodeSet, 8>;
311   using ValueMapTy = DenseMap<unsigned, unsigned>;
312   using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
313   using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
314 
315   /// Instructions to change when emitting the final schedule.
316   DenseMap<SUnit *, std::pair<Register, int64_t>> InstrChanges;
317 
318   /// We may create a new instruction, so remember it because it
319   /// must be deleted when the pass is finished.
320   DenseMap<MachineInstr*, MachineInstr *> NewMIs;
321 
322   /// Ordered list of DAG postprocessing steps.
323   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
324 
325   /// Used to compute single-iteration dependencies (i.e., buildSchedGraph).
326   AliasAnalysis *AA;
327 
328   /// Used to compute loop-carried dependencies (i.e.,
329   /// addLoopCarriedDependences).
330   BatchAAResults BAA;
331 
332   /// Helper class to implement Johnson's circuit finding algorithm.
333   class Circuits {
334     std::vector<SUnit> &SUnits;
335     SetVector<SUnit *> Stack;
336     BitVector Blocked;
337     SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
338     SmallVector<SmallVector<int, 4>, 16> AdjK;
339     // Node to Index from ScheduleDAGTopologicalSort
340     std::vector<int> *Node2Idx;
341     unsigned NumPaths = 0u;
342     static unsigned MaxPaths;
343 
344   public:
Circuits(std::vector<SUnit> & SUs,ScheduleDAGTopologicalSort & Topo)345     Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
346         : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
347       Node2Idx = new std::vector<int>(SUs.size());
348       unsigned Idx = 0;
349       for (const auto &NodeNum : Topo)
350         Node2Idx->at(NodeNum) = Idx++;
351     }
352     Circuits &operator=(const Circuits &other) = delete;
353     Circuits(const Circuits &other) = delete;
~Circuits()354     ~Circuits() { delete Node2Idx; }
355 
356     /// Reset the data structures used in the circuit algorithm.
reset()357     void reset() {
358       Stack.clear();
359       Blocked.reset();
360       B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
361       NumPaths = 0;
362     }
363 
364     void createAdjacencyStructure(SwingSchedulerDAG *DAG);
365     bool circuit(int V, int S, NodeSetType &NodeSets,
366                  const SwingSchedulerDAG *DAG, bool HasBackedge = false);
367     void unblock(int U);
368   };
369 
370   struct CopyToPhiMutation : public ScheduleDAGMutation {
371     void apply(ScheduleDAGInstrs *DAG) override;
372   };
373 
374 public:
SwingSchedulerDAG(MachinePipeliner & P,MachineLoop & L,LiveIntervals & lis,const RegisterClassInfo & rci,unsigned II,TargetInstrInfo::PipelinerLoopInfo * PLI,AliasAnalysis * AA)375   SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
376                     const RegisterClassInfo &rci, unsigned II,
377                     TargetInstrInfo::PipelinerLoopInfo *PLI, AliasAnalysis *AA)
378       : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
379         RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
380         Topo(SUnits, &ExitSU), AA(AA), BAA(*AA) {
381     P.MF->getSubtarget().getSMSMutations(Mutations);
382     if (SwpEnableCopyToPhi)
383       Mutations.push_back(std::make_unique<CopyToPhiMutation>());
384     BAA.enableCrossIterationMode();
385   }
386 
387   void schedule() override;
388   void finishBlock() override;
389 
390   /// Return true if the loop kernel has been scheduled.
hasNewSchedule()391   bool hasNewSchedule() { return Scheduled; }
392 
393   /// Return the earliest time an instruction may be scheduled.
getASAP(SUnit * Node)394   int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
395 
396   /// Return the latest time an instruction my be scheduled.
getALAP(SUnit * Node)397   int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
398 
399   /// The mobility function, which the number of slots in which
400   /// an instruction may be scheduled.
getMOV(SUnit * Node)401   int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
402 
403   /// The depth, in the dependence graph, for a node.
getDepth(SUnit * Node)404   unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
405 
406   /// The maximum unweighted length of a path from an arbitrary node to the
407   /// given node in which each edge has latency 0
getZeroLatencyDepth(SUnit * Node)408   int getZeroLatencyDepth(SUnit *Node) {
409     return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
410   }
411 
412   /// The height, in the dependence graph, for a node.
getHeight(SUnit * Node)413   unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
414 
415   /// The maximum unweighted length of a path from the given node to an
416   /// arbitrary node in which each edge has latency 0
getZeroLatencyHeight(SUnit * Node)417   int getZeroLatencyHeight(SUnit *Node) {
418     return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
419   }
420 
421   bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const;
422 
423   void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
424 
425   void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
426 
427   /// Return the new base register that was stored away for the changed
428   /// instruction.
getInstrBaseReg(SUnit * SU)429   Register getInstrBaseReg(SUnit *SU) const {
430     DenseMap<SUnit *, std::pair<Register, int64_t>>::const_iterator It =
431         InstrChanges.find(SU);
432     if (It != InstrChanges.end())
433       return It->second.first;
434     return Register();
435   }
436 
addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)437   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
438     Mutations.push_back(std::move(Mutation));
439   }
440 
classof(const ScheduleDAGInstrs * DAG)441   static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
442 
getDDG()443   const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
444 
445   bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
446                              const MachineInstr *OtherMI) const;
447 
448 private:
449   LoopCarriedEdges addLoopCarriedDependences();
450   void updatePhiDependences();
451   void changeDependences();
452   unsigned calculateResMII();
453   unsigned calculateRecMII(NodeSetType &RecNodeSets);
454   void findCircuits(NodeSetType &NodeSets);
455   void fuseRecs(NodeSetType &NodeSets);
456   void removeDuplicateNodes(NodeSetType &NodeSets);
457   void computeNodeFunctions(NodeSetType &NodeSets);
458   void registerPressureFilter(NodeSetType &NodeSets);
459   void colocateNodeSets(NodeSetType &NodeSets);
460   void checkNodeSets(NodeSetType &NodeSets);
461   void groupRemainingNodes(NodeSetType &NodeSets);
462   void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
463                          SetVector<SUnit *> &NodesAdded);
464   void computeNodeOrder(NodeSetType &NodeSets);
465   void checkValidNodeOrder(const NodeSetType &Circuits) const;
466   bool schedulePipeline(SMSchedule &Schedule);
467   bool computeDelta(const MachineInstr &MI, int &Delta) const;
468   MachineInstr *findDefInLoop(Register Reg);
469   bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
470                              unsigned &OffsetPos, Register &NewBase,
471                              int64_t &NewOffset);
472   void postProcessDAG();
473   /// Set the Minimum Initiation Interval for this schedule attempt.
474   void setMII(unsigned ResMII, unsigned RecMII);
475   /// Set the Maximum Initiation Interval for this schedule attempt.
476   void setMAX_II();
477 };
478 
479 /// A NodeSet contains a set of SUnit DAG nodes with additional information
480 /// that assigns a priority to the set.
481 class NodeSet {
482   SetVector<SUnit *> Nodes;
483   bool HasRecurrence = false;
484   unsigned RecMII = 0;
485   int MaxMOV = 0;
486   unsigned MaxDepth = 0;
487   unsigned Colocate = 0;
488   SUnit *ExceedPressure = nullptr;
489   unsigned Latency = 0;
490 
491 public:
492   using iterator = SetVector<SUnit *>::const_iterator;
493 
494   NodeSet() = default;
NodeSet(iterator S,iterator E,const SwingSchedulerDAG * DAG)495   NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
496       : Nodes(S, E), HasRecurrence(true) {
497     // Calculate the latency of this node set.
498     // Example to demonstrate the calculation:
499     // Given: N0 -> N1 -> N2 -> N0
500     // Edges:
501     // (N0 -> N1, 3)
502     // (N0 -> N1, 5)
503     // (N1 -> N2, 2)
504     // (N2 -> N0, 1)
505     // The total latency which is a lower bound of the recurrence MII is the
506     // longest path from N0 back to N0 given only the edges of this node set.
507     // In this example, the latency is: 5 + 2 + 1 = 8.
508     //
509     // Hold a map from each SUnit in the circle to the maximum distance from the
510     // source node by only considering the nodes.
511     const SwingSchedulerDDG *DDG = DAG->getDDG();
512     DenseMap<SUnit *, unsigned> SUnitToDistance;
513     for (auto *Node : Nodes)
514       SUnitToDistance[Node] = 0;
515 
516     for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
517       SUnit *U = Nodes[I - 1];
518       SUnit *V = Nodes[I % Nodes.size()];
519       for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
520         SUnit *SuccSUnit = Succ.getDst();
521         if (V != SuccSUnit)
522           continue;
523         unsigned &DU = SUnitToDistance[U];
524         unsigned &DV = SUnitToDistance[V];
525         if (DU + Succ.getLatency() > DV)
526           DV = DU + Succ.getLatency();
527       }
528     }
529     // Handle a back-edge in loop carried dependencies
530     SUnit *FirstNode = Nodes[0];
531     SUnit *LastNode = Nodes[Nodes.size() - 1];
532 
533     for (auto &PI : DDG->getInEdges(LastNode)) {
534       // If we have an order dep that is potentially loop carried then a
535       // back-edge exists between the last node and the first node that isn't
536       // modeled in the DAG. Handle it manually by adding 1 to the distance of
537       // the last node.
538       if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||
539           !DAG->isLoopCarriedDep(PI))
540         continue;
541       unsigned &First = SUnitToDistance[FirstNode];
542       unsigned Last = SUnitToDistance[LastNode];
543       First = std::max(First, Last + 1);
544     }
545 
546     // The latency is the distance from the source node to itself.
547     Latency = SUnitToDistance[Nodes.front()];
548   }
549 
insert(SUnit * SU)550   bool insert(SUnit *SU) { return Nodes.insert(SU); }
551 
insert(iterator S,iterator E)552   void insert(iterator S, iterator E) { Nodes.insert(S, E); }
553 
remove_if(UnaryPredicate P)554   template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
555     return Nodes.remove_if(P);
556   }
557 
count(SUnit * SU)558   unsigned count(SUnit *SU) const { return Nodes.count(SU); }
559 
hasRecurrence()560   bool hasRecurrence() { return HasRecurrence; };
561 
size()562   unsigned size() const { return Nodes.size(); }
563 
empty()564   bool empty() const { return Nodes.empty(); }
565 
getNode(unsigned i)566   SUnit *getNode(unsigned i) const { return Nodes[i]; };
567 
setRecMII(unsigned mii)568   void setRecMII(unsigned mii) { RecMII = mii; };
569 
setColocate(unsigned c)570   void setColocate(unsigned c) { Colocate = c; };
571 
setExceedPressure(SUnit * SU)572   void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
573 
isExceedSU(SUnit * SU)574   bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
575 
compareRecMII(NodeSet & RHS)576   int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
577 
getRecMII()578   int getRecMII() { return RecMII; }
579 
580   /// Summarize node functions for the entire node set.
computeNodeSetInfo(SwingSchedulerDAG * SSD)581   void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
582     for (SUnit *SU : *this) {
583       MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
584       MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
585     }
586   }
587 
getLatency()588   unsigned getLatency() { return Latency; }
589 
getMaxDepth()590   unsigned getMaxDepth() { return MaxDepth; }
591 
clear()592   void clear() {
593     Nodes.clear();
594     RecMII = 0;
595     HasRecurrence = false;
596     MaxMOV = 0;
597     MaxDepth = 0;
598     Colocate = 0;
599     ExceedPressure = nullptr;
600   }
601 
602   operator SetVector<SUnit *> &() { return Nodes; }
603 
604   /// Sort the node sets by importance. First, rank them by recurrence MII,
605   /// then by mobility (least mobile done first), and finally by depth.
606   /// Each node set may contain a colocate value which is used as the first
607   /// tie breaker, if it's set.
608   bool operator>(const NodeSet &RHS) const {
609     if (RecMII == RHS.RecMII) {
610       if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
611         return Colocate < RHS.Colocate;
612       if (MaxMOV == RHS.MaxMOV)
613         return MaxDepth > RHS.MaxDepth;
614       return MaxMOV < RHS.MaxMOV;
615     }
616     return RecMII > RHS.RecMII;
617   }
618 
619   bool operator==(const NodeSet &RHS) const {
620     return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
621            MaxDepth == RHS.MaxDepth;
622   }
623 
624   bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
625 
begin()626   iterator begin() { return Nodes.begin(); }
end()627   iterator end() { return Nodes.end(); }
628   void print(raw_ostream &os) const;
629 
630 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
631   LLVM_DUMP_METHOD void dump() const;
632 #endif
633 };
634 
635 // 16 was selected based on the number of ProcResource kinds for all
636 // existing Subtargets, so that SmallVector don't need to resize too often.
637 static const int DefaultProcResSize = 16;
638 
639 class ResourceManager {
640 private:
641   const MCSubtargetInfo *STI;
642   const MCSchedModel &SM;
643   const TargetSubtargetInfo *ST;
644   const TargetInstrInfo *TII;
645   ScheduleDAGInstrs *DAG;
646   const bool UseDFA;
647   /// DFA resources for each slot
648   llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources;
649   /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
650   /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
651   llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT;
652   /// The number of scheduled micro operations for each slot. Micro operations
653   /// are assumed to be scheduled one per cycle, starting with the cycle in
654   /// which the instruction is scheduled.
655   llvm::SmallVector<int> NumScheduledMops;
656   /// Each processor resource is associated with a so-called processor resource
657   /// mask. This vector allows to correlate processor resource IDs with
658   /// processor resource masks. There is exactly one element per each processor
659   /// resource declared by the scheduling model.
660   llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
661   int InitiationInterval = 0;
662   /// The number of micro operations that can be scheduled at a cycle.
663   int IssueWidth;
664 
665   int calculateResMIIDFA() const;
666   /// Check if MRT is overbooked
667   bool isOverbooked() const;
668   /// Reserve resources on MRT
669   void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
670   /// Unreserve resources on MRT
671   void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
672 
673   /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
674   /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
675   /// II).
positiveModulo(int Dividend,int Divisor)676   int positiveModulo(int Dividend, int Divisor) const {
677     assert(Divisor > 0);
678     int R = Dividend % Divisor;
679     if (R < 0)
680       R += Divisor;
681     return R;
682   }
683 
684 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685   LLVM_DUMP_METHOD void dumpMRT() const;
686 #endif
687 
688 public:
ResourceManager(const TargetSubtargetInfo * ST,ScheduleDAGInstrs * DAG)689   ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
690       : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
691         DAG(DAG), UseDFA(ST->useDFAforSMS()),
692         ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
693         IssueWidth(SM.IssueWidth) {
694     initProcResourceVectors(SM, ProcResourceMasks);
695     if (IssueWidth <= 0)
696       // If IssueWidth is not specified, set a sufficiently large value
697       IssueWidth = 100;
698     if (SwpForceIssueWidth > 0)
699       IssueWidth = SwpForceIssueWidth;
700   }
701 
702   void initProcResourceVectors(const MCSchedModel &SM,
703                                SmallVectorImpl<uint64_t> &Masks);
704 
705   /// Check if the resources occupied by a machine instruction are available
706   /// in the current state.
707   bool canReserveResources(SUnit &SU, int Cycle);
708 
709   /// Reserve the resources occupied by a machine instruction and change the
710   /// current state to reflect that change.
711   void reserveResources(SUnit &SU, int Cycle);
712 
713   int calculateResMII() const;
714 
715   /// Initialize resources with the initiation interval II.
716   void init(int II);
717 };
718 
719 /// This class represents the scheduled code.  The main data structure is a
720 /// map from scheduled cycle to instructions.  During scheduling, the
721 /// data structure explicitly represents all stages/iterations.   When
722 /// the algorithm finshes, the schedule is collapsed into a single stage,
723 /// which represents instructions from different loop iterations.
724 ///
725 /// The SMS algorithm allows negative values for cycles, so the first cycle
726 /// in the schedule is the smallest cycle value.
727 class SMSchedule {
728 private:
729   /// Map from execution cycle to instructions.
730   DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
731 
732   /// Map from instruction to execution cycle.
733   std::map<SUnit *, int> InstrToCycle;
734 
735   /// Keep track of the first cycle value in the schedule.  It starts
736   /// as zero, but the algorithm allows negative values.
737   int FirstCycle = 0;
738 
739   /// Keep track of the last cycle value in the schedule.
740   int LastCycle = 0;
741 
742   /// The initiation interval (II) for the schedule.
743   int InitiationInterval = 0;
744 
745   /// Target machine information.
746   const TargetSubtargetInfo &ST;
747 
748   /// Virtual register information.
749   MachineRegisterInfo &MRI;
750 
751   ResourceManager ProcItinResources;
752 
753 public:
SMSchedule(MachineFunction * mf,SwingSchedulerDAG * DAG)754   SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
755       : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
756         ProcItinResources(&ST, DAG) {}
757 
reset()758   void reset() {
759     ScheduledInstrs.clear();
760     InstrToCycle.clear();
761     FirstCycle = 0;
762     LastCycle = 0;
763     InitiationInterval = 0;
764   }
765 
766   /// Set the initiation interval for this schedule.
setInitiationInterval(int ii)767   void setInitiationInterval(int ii) {
768     InitiationInterval = ii;
769     ProcItinResources.init(ii);
770   }
771 
772   /// Return the initiation interval for this schedule.
getInitiationInterval()773   int getInitiationInterval() const { return InitiationInterval; }
774 
775   /// Return the first cycle in the completed schedule.  This
776   /// can be a negative value.
getFirstCycle()777   int getFirstCycle() const { return FirstCycle; }
778 
779   /// Return the last cycle in the finalized schedule.
getFinalCycle()780   int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
781 
782   /// Return the cycle of the earliest scheduled instruction in the dependence
783   /// chain.
784   int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep,
785                            const SwingSchedulerDDG *DDG);
786 
787   /// Return the cycle of the latest scheduled instruction in the dependence
788   /// chain.
789   int latestCycleInChain(const SwingSchedulerDDGEdge &Dep,
790                          const SwingSchedulerDDG *DDG);
791 
792   void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
793                     SwingSchedulerDAG *DAG);
794   bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
795 
796   /// Iterators for the cycle to instruction map.
797   using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
798   using const_sched_iterator =
799       DenseMap<int, std::deque<SUnit *>>::const_iterator;
800 
801   /// Return true if the instruction is scheduled at the specified stage.
isScheduledAtStage(SUnit * SU,unsigned StageNum)802   bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
803     return (stageScheduled(SU) == (int)StageNum);
804   }
805 
806   /// Return the stage for a scheduled instruction.  Return -1 if
807   /// the instruction has not been scheduled.
stageScheduled(SUnit * SU)808   int stageScheduled(SUnit *SU) const {
809     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
810     if (it == InstrToCycle.end())
811       return -1;
812     return (it->second - FirstCycle) / InitiationInterval;
813   }
814 
815   /// Return the cycle for a scheduled instruction. This function normalizes
816   /// the first cycle to be 0.
cycleScheduled(SUnit * SU)817   unsigned cycleScheduled(SUnit *SU) const {
818     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
819     assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
820     return (it->second - FirstCycle) % InitiationInterval;
821   }
822 
823   /// Return the maximum stage count needed for this schedule.
getMaxStageCount()824   unsigned getMaxStageCount() {
825     return (LastCycle - FirstCycle) / InitiationInterval;
826   }
827 
828   /// Return the instructions that are scheduled at the specified cycle.
getInstructions(int cycle)829   std::deque<SUnit *> &getInstructions(int cycle) {
830     return ScheduledInstrs[cycle];
831   }
832 
833   SmallSet<SUnit *, 8>
834   computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
835                              TargetInstrInfo::PipelinerLoopInfo *PLI);
836 
837   std::deque<SUnit *>
838   reorderInstructions(const SwingSchedulerDAG *SSD,
839                       const std::deque<SUnit *> &Instrs) const;
840 
841   bool
842   normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
843                                     TargetInstrInfo::PipelinerLoopInfo *PLI);
844   bool isValidSchedule(SwingSchedulerDAG *SSD);
845   void finalizeSchedule(SwingSchedulerDAG *SSD);
846   void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
847                        std::deque<SUnit *> &Insts) const;
848   bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
849   bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def,
850                              MachineOperand &MO) const;
851 
852   bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU,
853                                             const SwingSchedulerDDG *DDG) const;
854   void print(raw_ostream &os) const;
855   void dump() const;
856 };
857 
858 } // end namespace llvm
859 
860 #endif // LLVM_CODEGEN_MACHINEPIPELINER_H
861