xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling
10 /// for a MachineInstr-based dependency graph.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
16 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PointerIntPair.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SparseMultiSet.h"
21 #include "llvm/ADT/identity.h"
22 #include "llvm/CodeGen/LiveRegUnits.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/ScheduleDAG.h"
25 #include "llvm/CodeGen/TargetRegisterInfo.h"
26 #include "llvm/CodeGen/TargetSchedule.h"
27 #include "llvm/MC/LaneBitmask.h"
28 #include <cassert>
29 #include <cstdint>
30 #include <list>
31 #include <string>
32 #include <utility>
33 #include <vector>
34 
35 namespace llvm {
36 
37   class AAResults;
38   class LiveIntervals;
39   class MachineFrameInfo;
40   class MachineFunction;
41   class MachineInstr;
42   class MachineLoopInfo;
43   class MachineOperand;
44   struct MCSchedClassDesc;
45   class PressureDiffs;
46   class PseudoSourceValue;
47   class RegPressureTracker;
48   class UndefValue;
49   class Value;
50 
51   /// An individual mapping from virtual register number to SUnit.
52   struct VReg2SUnit {
53     unsigned VirtReg;
54     LaneBitmask LaneMask;
55     SUnit *SU;
56 
VReg2SUnitVReg2SUnit57     VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
58       : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
59 
getSparseSetIndexVReg2SUnit60     unsigned getSparseSetIndex() const {
61       return Register::virtReg2Index(VirtReg);
62     }
63   };
64 
65   /// Mapping from virtual register to SUnit including an operand index.
66   struct VReg2SUnitOperIdx : public VReg2SUnit {
67     unsigned OperandIndex;
68 
VReg2SUnitOperIdxVReg2SUnitOperIdx69     VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
70                       unsigned OperandIndex, SUnit *SU)
71       : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
72   };
73 
74   /// Record a physical register access.
75   /// For non-data-dependent uses, OpIdx == -1.
76   struct PhysRegSUOper {
77     SUnit *SU;
78     int OpIdx;
79     unsigned RegUnit;
80 
PhysRegSUOperPhysRegSUOper81     PhysRegSUOper(SUnit *su, int op, unsigned R)
82         : SU(su), OpIdx(op), RegUnit(R) {}
83 
getSparseSetIndexPhysRegSUOper84     unsigned getSparseSetIndex() const { return RegUnit; }
85   };
86 
87   /// Use a SparseMultiSet to track physical registers. Storage is only
88   /// allocated once for the pass. It can be cleared in constant time and reused
89   /// without any frees.
90   using RegUnit2SUnitsMap =
91       SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
92 
93   /// Track local uses of virtual registers. These uses are gathered by the DAG
94   /// builder and may be consulted by the scheduler to avoid iterating an entire
95   /// vreg use list.
96   using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
97 
98   using VReg2SUnitOperIdxMultiMap =
99       SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
100 
101   using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
102 
103   struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
UnderlyingObjectUnderlyingObject104     UnderlyingObject(ValueType V, bool MayAlias)
105         : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
106 
getValueUnderlyingObject107     ValueType getValue() const { return getPointer(); }
mayAliasUnderlyingObject108     bool mayAlias() const { return getInt(); }
109   };
110 
111   using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
112 
113   /// A ScheduleDAG for scheduling lists of MachineInstr.
114   class ScheduleDAGInstrs : public ScheduleDAG {
115   protected:
116     const MachineLoopInfo *MLI = nullptr;
117     const MachineFrameInfo &MFI;
118 
119     /// TargetSchedModel provides an interface to the machine model.
120     TargetSchedModel SchedModel;
121 
122     /// True if the DAG builder should remove kill flags (in preparation for
123     /// rescheduling).
124     bool RemoveKillFlags;
125 
126     /// The standard DAG builder does not normally include terminators as DAG
127     /// nodes because it does not create the necessary dependencies to prevent
128     /// reordering. A specialized scheduler can override
129     /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
130     /// it has taken responsibility for scheduling the terminator correctly.
131     bool CanHandleTerminators = false;
132 
133     /// Whether lane masks should get tracked.
134     bool TrackLaneMasks = false;
135 
136     // State specific to the current scheduling region.
137     // ------------------------------------------------
138 
139     /// The block in which to insert instructions
140     MachineBasicBlock *BB = nullptr;
141 
142     /// The beginning of the range to be scheduled.
143     MachineBasicBlock::iterator RegionBegin;
144 
145     /// The end of the range to be scheduled.
146     MachineBasicBlock::iterator RegionEnd;
147 
148     /// Instructions in this region (distance(RegionBegin, RegionEnd)).
149     unsigned NumRegionInstrs = 0;
150 
151     /// After calling BuildSchedGraph, each machine instruction in the current
152     /// scheduling region is mapped to an SUnit.
153     DenseMap<MachineInstr*, SUnit*> MISUnitMap;
154 
155     // State internal to DAG building.
156     // -------------------------------
157 
158     /// Defs, Uses - Remember where defs and uses of each register are as we
159     /// iterate upward through the instructions. This is allocated here instead
160     /// of inside BuildSchedGraph to avoid the need for it to be initialized and
161     /// destructed for each block.
162     RegUnit2SUnitsMap Defs;
163     RegUnit2SUnitsMap Uses;
164 
165     /// Tracks the last instruction(s) in this region defining each virtual
166     /// register. There may be multiple current definitions for a register with
167     /// disjunct lanemasks.
168     VReg2SUnitMultiMap CurrentVRegDefs;
169     /// Tracks the last instructions in this region using each virtual register.
170     VReg2SUnitOperIdxMultiMap CurrentVRegUses;
171 
172     AAResults *AAForDep = nullptr;
173 
174     /// Remember a generic side-effecting instruction as we proceed.
175     /// No other SU ever gets scheduled around it (except in the special
176     /// case of a huge region that gets reduced).
177     SUnit *BarrierChain = nullptr;
178 
179   public:
180     /// A list of SUnits, used in Value2SUsMap, during DAG construction.
181     /// Note: to gain speed it might be worth investigating an optimized
182     /// implementation of this data structure, such as a singly linked list
183     /// with a memory pool (SmallVector was tried but slow and SparseSet is not
184     /// applicable).
185     using SUList = std::list<SUnit *>;
186 
187     /// The direction that should be used to dump the scheduled Sequence.
188     enum DumpDirection {
189       TopDown,
190       BottomUp,
191       Bidirectional,
192       NotSet,
193     };
194 
setDumpDirection(DumpDirection D)195     void setDumpDirection(DumpDirection D) { DumpDir = D; }
196 
197   protected:
198     DumpDirection DumpDir = NotSet;
199 
200     /// A map from ValueType to SUList, used during DAG construction, as
201     /// a means of remembering which SUs depend on which memory locations.
202     class Value2SUsMap;
203 
204     /// Reduces maps in FIFO order, by N SUs. This is better than turning
205     /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
206     /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
207     /// and those below it.
208     void reduceHugeMemNodeMaps(Value2SUsMap &stores,
209                                Value2SUsMap &loads, unsigned N);
210 
211     /// Adds a chain edge between SUa and SUb, but only if both
212     /// AAResults and Target fail to deny the dependency.
213     void addChainDependency(SUnit *SUa, SUnit *SUb,
214                             unsigned Latency = 0);
215 
216     /// Adds dependencies as needed from all SUs in list to SU.
addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)217     void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
218       for (SUnit *Entry : SUs)
219         addChainDependency(SU, Entry, Latency);
220     }
221 
222     /// Adds dependencies as needed from all SUs in map, to SU.
223     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
224 
225     /// Adds dependencies as needed to SU, from all SUs mapped to V.
226     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
227                               ValueType V);
228 
229     /// Adds barrier chain edges from all SUs in map, and then clear the map.
230     /// This is equivalent to insertBarrierChain(), but optimized for the common
231     /// case where the new BarrierChain (a global memory object) has a higher
232     /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
233     /// before calling this.
234     void addBarrierChain(Value2SUsMap &map);
235 
236     /// Inserts a barrier chain in a huge region, far below current SU.
237     /// Adds barrier chain edges from all SUs in map with higher NodeNums than
238     /// this new BarrierChain, and remove them from map. It is assumed
239     /// BarrierChain has been set before calling this.
240     void insertBarrierChain(Value2SUsMap &map);
241 
242     /// For an unanalyzable memory access, this Value is used in maps.
243     UndefValue *UnknownValue;
244 
245 
246     /// Topo - A topological ordering for SUnits which permits fast IsReachable
247     /// and similar queries.
248     ScheduleDAGTopologicalSort Topo;
249 
250     using DbgValueVector =
251         std::vector<std::pair<MachineInstr *, MachineInstr *>>;
252     /// Remember instruction that precedes DBG_VALUE.
253     /// These are generated by buildSchedGraph but persist so they can be
254     /// referenced when emitting the final schedule.
255     DbgValueVector DbgValues;
256     MachineInstr *FirstDbgValue = nullptr;
257 
258     /// Set of live physical registers for updating kill flags.
259     LiveRegUnits LiveRegs;
260 
261   public:
262     explicit ScheduleDAGInstrs(MachineFunction &mf,
263                                const MachineLoopInfo *mli,
264                                bool RemoveKillFlags = false);
265 
266     ~ScheduleDAGInstrs() override = default;
267 
268     /// Gets the machine model for instruction scheduling.
getSchedModel()269     const TargetSchedModel *getSchedModel() const { return &SchedModel; }
270 
271     /// Resolves and cache a resolved scheduling class for an SUnit.
getSchedClass(SUnit * SU)272     const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
273       if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
274         SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
275       return SU->SchedClass;
276     }
277 
278     /// IsReachable - Checks if SU is reachable from TargetSU.
IsReachable(SUnit * SU,SUnit * TargetSU)279     bool IsReachable(SUnit *SU, SUnit *TargetSU) {
280       return Topo.IsReachable(SU, TargetSU);
281     }
282 
283     /// Returns an iterator to the top of the current scheduling region.
begin()284     MachineBasicBlock::iterator begin() const { return RegionBegin; }
285 
286     /// Returns an iterator to the bottom of the current scheduling region.
end()287     MachineBasicBlock::iterator end() const { return RegionEnd; }
288 
289     /// Creates a new SUnit and return a ptr to it.
290     SUnit *newSUnit(MachineInstr *MI);
291 
292     /// Returns an existing SUnit for this MI, or nullptr.
293     SUnit *getSUnit(MachineInstr *MI) const;
294 
295     /// If this method returns true, handling of the scheduling regions
296     /// themselves (in case of a scheduling boundary in MBB) will be done
297     /// beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()298     virtual bool doMBBSchedRegionsTopDown() const { return false; }
299 
300     /// Prepares to perform scheduling in the given block.
301     virtual void startBlock(MachineBasicBlock *BB);
302 
303     /// Cleans up after scheduling in the given block.
304     virtual void finishBlock();
305 
306     /// Initialize the DAG and common scheduler state for a new
307     /// scheduling region. This does not actually create the DAG, only clears
308     /// it. The scheduling driver may call BuildSchedGraph multiple times per
309     /// scheduling region.
310     virtual void enterRegion(MachineBasicBlock *bb,
311                              MachineBasicBlock::iterator begin,
312                              MachineBasicBlock::iterator end,
313                              unsigned regioninstrs);
314 
315     /// Called when the scheduler has finished scheduling the current region.
316     virtual void exitRegion();
317 
318     /// Builds SUnits for the current region.
319     /// If \p RPTracker is non-null, compute register pressure as a side effect.
320     /// The DAG builder is an efficient place to do it because it already visits
321     /// operands.
322     void buildSchedGraph(AAResults *AA,
323                          RegPressureTracker *RPTracker = nullptr,
324                          PressureDiffs *PDiffs = nullptr,
325                          LiveIntervals *LIS = nullptr,
326                          bool TrackLaneMasks = false);
327 
328     /// Adds dependencies from instructions in the current list of
329     /// instructions being scheduled to scheduling barrier. We want to make sure
330     /// instructions which define registers that are either used by the
331     /// terminator or are live-out are properly scheduled. This is especially
332     /// important when the definition latency of the return value(s) are too
333     /// high to be hidden by the branch or when the liveout registers used by
334     /// instructions in the fallthrough block.
335     void addSchedBarrierDeps();
336 
337     /// Orders nodes according to selected style.
338     ///
339     /// Typically, a scheduling algorithm will implement schedule() without
340     /// overriding enterRegion() or exitRegion().
341     virtual void schedule() = 0;
342 
343     /// Allow targets to perform final scheduling actions at the level of the
344     /// whole MachineFunction. By default does nothing.
finalizeSchedule()345     virtual void finalizeSchedule() {}
346 
347     void dumpNode(const SUnit &SU) const override;
348     void dump() const override;
349 
350     /// Returns a label for a DAG node that points to an instruction.
351     std::string getGraphNodeLabel(const SUnit *SU) const override;
352 
353     /// Returns a label for the region of code covered by the DAG.
354     std::string getDAGName() const override;
355 
356     /// Fixes register kill flags that scheduling has made invalid.
357     void fixupKills(MachineBasicBlock &MBB);
358 
359     /// True if an edge can be added from PredSU to SuccSU without creating
360     /// a cycle.
361     bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
362 
363     /// Add a DAG edge to the given SU with the given predecessor
364     /// dependence data.
365     ///
366     /// \returns true if the edge may be added without creating a cycle OR if an
367     /// equivalent edge already existed (false indicates failure).
368     bool addEdge(SUnit *SuccSU, const SDep &PredDep);
369 
370   protected:
371     void initSUnits();
372     void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
373     void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
374     void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
375     void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
376 
377     /// Returns a mask for which lanes get read/written by the given (register)
378     /// machine operand.
379     LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
380 
381     /// Returns true if the def register in \p MO has no uses.
382     bool deadDefHasNoUse(const MachineOperand &MO);
383   };
384 
385   /// Creates a new SUnit and return a ptr to it.
newSUnit(MachineInstr * MI)386   inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
387 #ifndef NDEBUG
388     const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
389 #endif
390     SUnits.emplace_back(MI, (unsigned)SUnits.size());
391     assert((Addr == nullptr || Addr == &SUnits[0]) &&
392            "SUnits std::vector reallocated on the fly!");
393     return &SUnits.back();
394   }
395 
396   /// Returns an existing SUnit for this MI, or nullptr.
getSUnit(MachineInstr * MI)397   inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
398     return MISUnitMap.lookup(MI);
399   }
400 
401 } // end namespace llvm
402 
403 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
404