xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling
10 /// for a MachineInstr-based dependency graph.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
16 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PointerIntPair.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SparseMultiSet.h"
21 #include "llvm/ADT/identity.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/CodeGen/LiveRegUnits.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/CodeGen/TargetRegisterInfo.h"
27 #include "llvm/CodeGen/TargetSchedule.h"
28 #include "llvm/MC/LaneBitmask.h"
29 #include "llvm/Support/Compiler.h"
30 #include <cassert>
31 #include <cstdint>
32 #include <list>
33 #include <string>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39   class AAResults;
40   class LiveIntervals;
41   class MachineFrameInfo;
42   class MachineFunction;
43   class MachineInstr;
44   class MachineLoopInfo;
45   class MachineOperand;
46   struct MCSchedClassDesc;
47   class PressureDiffs;
48   class PseudoSourceValue;
49   class RegPressureTracker;
50   class UndefValue;
51   class Value;
52 
53   /// An individual mapping from virtual register number to SUnit.
54   struct VReg2SUnit {
55     Register VirtReg;
56     LaneBitmask LaneMask;
57     SUnit *SU;
58 
VReg2SUnitVReg2SUnit59     VReg2SUnit(Register VReg, LaneBitmask LaneMask, SUnit *SU)
60       : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
61 
getSparseSetIndexVReg2SUnit62     unsigned getSparseSetIndex() const {
63       return VirtReg.virtRegIndex();
64     }
65   };
66 
67   /// Mapping from virtual register to SUnit including an operand index.
68   struct VReg2SUnitOperIdx : public VReg2SUnit {
69     unsigned OperandIndex;
70 
VReg2SUnitOperIdxVReg2SUnitOperIdx71     VReg2SUnitOperIdx(Register VReg, LaneBitmask LaneMask,
72                       unsigned OperandIndex, SUnit *SU)
73       : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
74   };
75 
76   /// Record a physical register access.
77   /// For non-data-dependent uses, OpIdx == -1.
78   struct PhysRegSUOper {
79     SUnit *SU;
80     int OpIdx;
81     unsigned RegUnit;
82 
PhysRegSUOperPhysRegSUOper83     PhysRegSUOper(SUnit *su, int op, unsigned R)
84         : SU(su), OpIdx(op), RegUnit(R) {}
85 
getSparseSetIndexPhysRegSUOper86     unsigned getSparseSetIndex() const { return RegUnit; }
87   };
88 
89   /// Use a SparseMultiSet to track physical registers. Storage is only
90   /// allocated once for the pass. It can be cleared in constant time and reused
91   /// without any frees.
92   using RegUnit2SUnitsMap =
93       SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
94 
95   /// Track local uses of virtual registers. These uses are gathered by the DAG
96   /// builder and may be consulted by the scheduler to avoid iterating an entire
97   /// vreg use list.
98   using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
99 
100   using VReg2SUnitOperIdxMultiMap =
101       SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
102 
103   using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
104 
105   struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
UnderlyingObjectUnderlyingObject106     UnderlyingObject(ValueType V, bool MayAlias)
107         : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
108 
getValueUnderlyingObject109     ValueType getValue() const { return getPointer(); }
mayAliasUnderlyingObject110     bool mayAlias() const { return getInt(); }
111   };
112 
113   using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
114 
115   /// A ScheduleDAG for scheduling lists of MachineInstr.
116   class LLVM_ABI ScheduleDAGInstrs : public ScheduleDAG {
117   protected:
118     const MachineLoopInfo *MLI = nullptr;
119     const MachineFrameInfo &MFI;
120 
121     /// TargetSchedModel provides an interface to the machine model.
122     TargetSchedModel SchedModel;
123 
124     /// True if the DAG builder should remove kill flags (in preparation for
125     /// rescheduling).
126     bool RemoveKillFlags;
127 
128     /// True if regions with a single MI should be scheduled.
129     bool ScheduleSingleMIRegions = false;
130 
131     /// The standard DAG builder does not normally include terminators as DAG
132     /// nodes because it does not create the necessary dependencies to prevent
133     /// reordering. A specialized scheduler can override
134     /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
135     /// it has taken responsibility for scheduling the terminator correctly.
136     bool CanHandleTerminators = false;
137 
138     /// Whether lane masks should get tracked.
139     bool TrackLaneMasks = false;
140 
141     // State specific to the current scheduling region.
142     // ------------------------------------------------
143 
144     /// The block in which to insert instructions
145     MachineBasicBlock *BB = nullptr;
146 
147     /// The beginning of the range to be scheduled.
148     MachineBasicBlock::iterator RegionBegin;
149 
150     /// The end of the range to be scheduled.
151     MachineBasicBlock::iterator RegionEnd;
152 
153     /// Instructions in this region (distance(RegionBegin, RegionEnd)).
154     unsigned NumRegionInstrs = 0;
155 
156     /// After calling BuildSchedGraph, each machine instruction in the current
157     /// scheduling region is mapped to an SUnit.
158     DenseMap<MachineInstr*, SUnit*> MISUnitMap;
159 
160     // State internal to DAG building.
161     // -------------------------------
162 
163     /// Defs, Uses - Remember where defs and uses of each register are as we
164     /// iterate upward through the instructions. This is allocated here instead
165     /// of inside BuildSchedGraph to avoid the need for it to be initialized and
166     /// destructed for each block.
167     RegUnit2SUnitsMap Defs;
168     RegUnit2SUnitsMap Uses;
169 
170     /// Tracks the last instruction(s) in this region defining each virtual
171     /// register. There may be multiple current definitions for a register with
172     /// disjunct lanemasks.
173     VReg2SUnitMultiMap CurrentVRegDefs;
174     /// Tracks the last instructions in this region using each virtual register.
175     VReg2SUnitOperIdxMultiMap CurrentVRegUses;
176 
177     mutable std::optional<BatchAAResults> AAForDep;
178 
179     /// Remember a generic side-effecting instruction as we proceed.
180     /// No other SU ever gets scheduled around it (except in the special
181     /// case of a huge region that gets reduced).
182     SUnit *BarrierChain = nullptr;
183 
184     SmallVector<ClusterInfo> Clusters;
185 
186   public:
187     /// A list of SUnits, used in Value2SUsMap, during DAG construction.
188     /// Note: to gain speed it might be worth investigating an optimized
189     /// implementation of this data structure, such as a singly linked list
190     /// with a memory pool (SmallVector was tried but slow and SparseSet is not
191     /// applicable).
192     using SUList = std::list<SUnit *>;
193 
194     /// The direction that should be used to dump the scheduled Sequence.
195     enum DumpDirection {
196       TopDown,
197       BottomUp,
198       Bidirectional,
199       NotSet,
200     };
201 
setDumpDirection(DumpDirection D)202     void setDumpDirection(DumpDirection D) { DumpDir = D; }
203 
204   protected:
205     DumpDirection DumpDir = NotSet;
206 
207     /// A map from ValueType to SUList, used during DAG construction, as
208     /// a means of remembering which SUs depend on which memory locations.
209     class Value2SUsMap;
210 
211     /// Returns a (possibly null) pointer to the current BatchAAResults.
getAAForDep()212     BatchAAResults *getAAForDep() const {
213       if (AAForDep.has_value())
214         return &AAForDep.value();
215       return nullptr;
216     }
217 
218     /// Reduces maps in FIFO order, by N SUs. This is better than turning
219     /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
220     /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
221     /// and those below it.
222     void reduceHugeMemNodeMaps(Value2SUsMap &stores,
223                                Value2SUsMap &loads, unsigned N);
224 
225     /// Adds a chain edge between SUa and SUb, but only if both
226     /// AAResults and Target fail to deny the dependency.
227     void addChainDependency(SUnit *SUa, SUnit *SUb,
228                             unsigned Latency = 0);
229 
230     /// Adds dependencies as needed from all SUs in list to SU.
addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)231     void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
232       for (SUnit *Entry : SUs)
233         addChainDependency(SU, Entry, Latency);
234     }
235 
236     /// Adds dependencies as needed from all SUs in map, to SU.
237     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
238 
239     /// Adds dependencies as needed to SU, from all SUs mapped to V.
240     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
241                               ValueType V);
242 
243     /// Adds barrier chain edges from all SUs in map, and then clear the map.
244     /// This is equivalent to insertBarrierChain(), but optimized for the common
245     /// case where the new BarrierChain (a global memory object) has a higher
246     /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
247     /// before calling this.
248     void addBarrierChain(Value2SUsMap &map);
249 
250     /// Inserts a barrier chain in a huge region, far below current SU.
251     /// Adds barrier chain edges from all SUs in map with higher NodeNums than
252     /// this new BarrierChain, and remove them from map. It is assumed
253     /// BarrierChain has been set before calling this.
254     void insertBarrierChain(Value2SUsMap &map);
255 
256     /// For an unanalyzable memory access, this Value is used in maps.
257     UndefValue *UnknownValue;
258 
259 
260     /// Topo - A topological ordering for SUnits which permits fast IsReachable
261     /// and similar queries.
262     ScheduleDAGTopologicalSort Topo;
263 
264     using DbgValueVector =
265         std::vector<std::pair<MachineInstr *, MachineInstr *>>;
266     /// Remember instruction that precedes DBG_VALUE.
267     /// These are generated by buildSchedGraph but persist so they can be
268     /// referenced when emitting the final schedule.
269     DbgValueVector DbgValues;
270     MachineInstr *FirstDbgValue = nullptr;
271 
272     /// Set of live physical registers for updating kill flags.
273     LiveRegUnits LiveRegs;
274 
275   public:
276     explicit ScheduleDAGInstrs(MachineFunction &mf,
277                                const MachineLoopInfo *mli,
278                                bool RemoveKillFlags = false);
279 
280     ~ScheduleDAGInstrs() override = default;
281 
282     /// Gets the machine model for instruction scheduling.
getSchedModel()283     const TargetSchedModel *getSchedModel() const { return &SchedModel; }
284 
285     /// Resolves and cache a resolved scheduling class for an SUnit.
getSchedClass(SUnit * SU)286     const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
287       if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
288         SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
289       return SU->SchedClass;
290     }
291 
292     /// IsReachable - Checks if SU is reachable from TargetSU.
IsReachable(SUnit * SU,SUnit * TargetSU)293     bool IsReachable(SUnit *SU, SUnit *TargetSU) {
294       return Topo.IsReachable(SU, TargetSU);
295     }
296 
297     /// Whether regions with a single MI should be scheduled.
shouldScheduleSingleMIRegions()298     bool shouldScheduleSingleMIRegions() const {
299       return ScheduleSingleMIRegions;
300     }
301 
302     /// Returns an iterator to the top of the current scheduling region.
begin()303     MachineBasicBlock::iterator begin() const { return RegionBegin; }
304 
305     /// Returns an iterator to the bottom of the current scheduling region.
end()306     MachineBasicBlock::iterator end() const { return RegionEnd; }
307 
308     /// Creates a new SUnit and return a ptr to it.
309     SUnit *newSUnit(MachineInstr *MI);
310 
311     /// Returns an existing SUnit for this MI, or nullptr.
312     SUnit *getSUnit(MachineInstr *MI) const;
313 
314     /// If this method returns true, handling of the scheduling regions
315     /// themselves (in case of a scheduling boundary in MBB) will be done
316     /// beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()317     virtual bool doMBBSchedRegionsTopDown() const { return false; }
318 
319     /// Prepares to perform scheduling in the given block.
320     virtual void startBlock(MachineBasicBlock *BB);
321 
322     /// Cleans up after scheduling in the given block.
323     virtual void finishBlock();
324 
325     /// Initialize the DAG and common scheduler state for a new
326     /// scheduling region. This does not actually create the DAG, only clears
327     /// it. The scheduling driver may call BuildSchedGraph multiple times per
328     /// scheduling region.
329     virtual void enterRegion(MachineBasicBlock *bb,
330                              MachineBasicBlock::iterator begin,
331                              MachineBasicBlock::iterator end,
332                              unsigned regioninstrs);
333 
334     /// Called when the scheduler has finished scheduling the current region.
335     virtual void exitRegion();
336 
337     /// Builds SUnits for the current region.
338     /// If \p RPTracker is non-null, compute register pressure as a side effect.
339     /// The DAG builder is an efficient place to do it because it already visits
340     /// operands.
341     void buildSchedGraph(AAResults *AA,
342                          RegPressureTracker *RPTracker = nullptr,
343                          PressureDiffs *PDiffs = nullptr,
344                          LiveIntervals *LIS = nullptr,
345                          bool TrackLaneMasks = false);
346 
347     /// Adds dependencies from instructions in the current list of
348     /// instructions being scheduled to scheduling barrier. We want to make sure
349     /// instructions which define registers that are either used by the
350     /// terminator or are live-out are properly scheduled. This is especially
351     /// important when the definition latency of the return value(s) are too
352     /// high to be hidden by the branch or when the liveout registers used by
353     /// instructions in the fallthrough block.
354     void addSchedBarrierDeps();
355 
356     /// Orders nodes according to selected style.
357     ///
358     /// Typically, a scheduling algorithm will implement schedule() without
359     /// overriding enterRegion() or exitRegion().
360     virtual void schedule() = 0;
361 
362     /// Allow targets to perform final scheduling actions at the level of the
363     /// whole MachineFunction. By default does nothing.
finalizeSchedule()364     virtual void finalizeSchedule() {}
365 
366     void dumpNode(const SUnit &SU) const override;
367     void dump() const override;
368 
369     /// Returns a label for a DAG node that points to an instruction.
370     std::string getGraphNodeLabel(const SUnit *SU) const override;
371 
372     /// Returns a label for the region of code covered by the DAG.
373     std::string getDAGName() const override;
374 
375     /// Fixes register kill flags that scheduling has made invalid.
376     void fixupKills(MachineBasicBlock &MBB);
377 
378     /// True if an edge can be added from PredSU to SuccSU without creating
379     /// a cycle.
380     bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
381 
382     /// Add a DAG edge to the given SU with the given predecessor
383     /// dependence data.
384     ///
385     /// \returns true if the edge may be added without creating a cycle OR if an
386     /// equivalent edge already existed (false indicates failure).
387     bool addEdge(SUnit *SuccSU, const SDep &PredDep);
388 
389     /// Returns the array of the clusters.
getClusters()390     SmallVector<ClusterInfo> &getClusters() { return Clusters; }
391 
392     /// Get the specific cluster, return nullptr for InvalidClusterId.
getCluster(unsigned Idx)393     ClusterInfo *getCluster(unsigned Idx) {
394       return Idx != InvalidClusterId ? &Clusters[Idx] : nullptr;
395     }
396 
397   protected:
398     void initSUnits();
399     void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
400     void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
401     void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
402     void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
403 
404     /// Returns a mask for which lanes get read/written by the given (register)
405     /// machine operand.
406     LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
407 
408     /// Returns true if the def register in \p MO has no uses.
409     bool deadDefHasNoUse(const MachineOperand &MO);
410   };
411 
412   /// Creates a new SUnit and return a ptr to it.
newSUnit(MachineInstr * MI)413   inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
414 #ifndef NDEBUG
415     const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
416 #endif
417     SUnits.emplace_back(MI, (unsigned)SUnits.size());
418     assert((Addr == nullptr || Addr == &SUnits[0]) &&
419            "SUnits std::vector reallocated on the fly!");
420     return &SUnits.back();
421   }
422 
423   /// Returns an existing SUnit for this MI, or nullptr.
getSUnit(MachineInstr * MI)424   inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
425     return MISUnitMap.lookup(MI);
426   }
427 
428 } // end namespace llvm
429 
430 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
431