1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling 10 /// for a MachineInstr-based dependency graph. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 16 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PointerIntPair.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/SparseMultiSet.h" 21 #include "llvm/ADT/identity.h" 22 #include "llvm/CodeGen/LiveRegUnits.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/ScheduleDAG.h" 25 #include "llvm/CodeGen/TargetRegisterInfo.h" 26 #include "llvm/CodeGen/TargetSchedule.h" 27 #include "llvm/MC/LaneBitmask.h" 28 #include <cassert> 29 #include <cstdint> 30 #include <list> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 namespace llvm { 36 37 class AAResults; 38 class LiveIntervals; 39 class MachineFrameInfo; 40 class MachineFunction; 41 class MachineInstr; 42 class MachineLoopInfo; 43 class MachineOperand; 44 struct MCSchedClassDesc; 45 class PressureDiffs; 46 class PseudoSourceValue; 47 class RegPressureTracker; 48 class UndefValue; 49 class Value; 50 51 /// An individual mapping from virtual register number to SUnit. 52 struct VReg2SUnit { 53 unsigned VirtReg; 54 LaneBitmask LaneMask; 55 SUnit *SU; 56 VReg2SUnitVReg2SUnit57 VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) 58 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} 59 getSparseSetIndexVReg2SUnit60 unsigned getSparseSetIndex() const { 61 return Register::virtReg2Index(VirtReg); 62 } 63 }; 64 65 /// Mapping from virtual register to SUnit including an operand index. 66 struct VReg2SUnitOperIdx : public VReg2SUnit { 67 unsigned OperandIndex; 68 VReg2SUnitOperIdxVReg2SUnitOperIdx69 VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, 70 unsigned OperandIndex, SUnit *SU) 71 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} 72 }; 73 74 /// Record a physical register access. 75 /// For non-data-dependent uses, OpIdx == -1. 76 struct PhysRegSUOper { 77 SUnit *SU; 78 int OpIdx; 79 unsigned RegUnit; 80 PhysRegSUOperPhysRegSUOper81 PhysRegSUOper(SUnit *su, int op, unsigned R) 82 : SU(su), OpIdx(op), RegUnit(R) {} 83 getSparseSetIndexPhysRegSUOper84 unsigned getSparseSetIndex() const { return RegUnit; } 85 }; 86 87 /// Use a SparseMultiSet to track physical registers. Storage is only 88 /// allocated once for the pass. It can be cleared in constant time and reused 89 /// without any frees. 90 using RegUnit2SUnitsMap = 91 SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; 92 93 /// Track local uses of virtual registers. These uses are gathered by the DAG 94 /// builder and may be consulted by the scheduler to avoid iterating an entire 95 /// vreg use list. 96 using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; 97 98 using VReg2SUnitOperIdxMultiMap = 99 SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; 100 101 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; 102 103 struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { UnderlyingObjectUnderlyingObject104 UnderlyingObject(ValueType V, bool MayAlias) 105 : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} 106 getValueUnderlyingObject107 ValueType getValue() const { return getPointer(); } mayAliasUnderlyingObject108 bool mayAlias() const { return getInt(); } 109 }; 110 111 using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; 112 113 /// A ScheduleDAG for scheduling lists of MachineInstr. 114 class ScheduleDAGInstrs : public ScheduleDAG { 115 protected: 116 const MachineLoopInfo *MLI = nullptr; 117 const MachineFrameInfo &MFI; 118 119 /// TargetSchedModel provides an interface to the machine model. 120 TargetSchedModel SchedModel; 121 122 /// True if the DAG builder should remove kill flags (in preparation for 123 /// rescheduling). 124 bool RemoveKillFlags; 125 126 /// The standard DAG builder does not normally include terminators as DAG 127 /// nodes because it does not create the necessary dependencies to prevent 128 /// reordering. A specialized scheduler can override 129 /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate 130 /// it has taken responsibility for scheduling the terminator correctly. 131 bool CanHandleTerminators = false; 132 133 /// Whether lane masks should get tracked. 134 bool TrackLaneMasks = false; 135 136 // State specific to the current scheduling region. 137 // ------------------------------------------------ 138 139 /// The block in which to insert instructions 140 MachineBasicBlock *BB = nullptr; 141 142 /// The beginning of the range to be scheduled. 143 MachineBasicBlock::iterator RegionBegin; 144 145 /// The end of the range to be scheduled. 146 MachineBasicBlock::iterator RegionEnd; 147 148 /// Instructions in this region (distance(RegionBegin, RegionEnd)). 149 unsigned NumRegionInstrs = 0; 150 151 /// After calling BuildSchedGraph, each machine instruction in the current 152 /// scheduling region is mapped to an SUnit. 153 DenseMap<MachineInstr*, SUnit*> MISUnitMap; 154 155 // State internal to DAG building. 156 // ------------------------------- 157 158 /// Defs, Uses - Remember where defs and uses of each register are as we 159 /// iterate upward through the instructions. This is allocated here instead 160 /// of inside BuildSchedGraph to avoid the need for it to be initialized and 161 /// destructed for each block. 162 RegUnit2SUnitsMap Defs; 163 RegUnit2SUnitsMap Uses; 164 165 /// Tracks the last instruction(s) in this region defining each virtual 166 /// register. There may be multiple current definitions for a register with 167 /// disjunct lanemasks. 168 VReg2SUnitMultiMap CurrentVRegDefs; 169 /// Tracks the last instructions in this region using each virtual register. 170 VReg2SUnitOperIdxMultiMap CurrentVRegUses; 171 172 AAResults *AAForDep = nullptr; 173 174 /// Remember a generic side-effecting instruction as we proceed. 175 /// No other SU ever gets scheduled around it (except in the special 176 /// case of a huge region that gets reduced). 177 SUnit *BarrierChain = nullptr; 178 179 public: 180 /// A list of SUnits, used in Value2SUsMap, during DAG construction. 181 /// Note: to gain speed it might be worth investigating an optimized 182 /// implementation of this data structure, such as a singly linked list 183 /// with a memory pool (SmallVector was tried but slow and SparseSet is not 184 /// applicable). 185 using SUList = std::list<SUnit *>; 186 187 /// The direction that should be used to dump the scheduled Sequence. 188 enum DumpDirection { 189 TopDown, 190 BottomUp, 191 Bidirectional, 192 NotSet, 193 }; 194 setDumpDirection(DumpDirection D)195 void setDumpDirection(DumpDirection D) { DumpDir = D; } 196 197 protected: 198 DumpDirection DumpDir = NotSet; 199 200 /// A map from ValueType to SUList, used during DAG construction, as 201 /// a means of remembering which SUs depend on which memory locations. 202 class Value2SUsMap; 203 204 /// Reduces maps in FIFO order, by N SUs. This is better than turning 205 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since 206 /// it avoids unnecessary edges between seen SUs above the new BarrierChain, 207 /// and those below it. 208 void reduceHugeMemNodeMaps(Value2SUsMap &stores, 209 Value2SUsMap &loads, unsigned N); 210 211 /// Adds a chain edge between SUa and SUb, but only if both 212 /// AAResults and Target fail to deny the dependency. 213 void addChainDependency(SUnit *SUa, SUnit *SUb, 214 unsigned Latency = 0); 215 216 /// Adds dependencies as needed from all SUs in list to SU. addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)217 void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) { 218 for (SUnit *Entry : SUs) 219 addChainDependency(SU, Entry, Latency); 220 } 221 222 /// Adds dependencies as needed from all SUs in map, to SU. 223 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); 224 225 /// Adds dependencies as needed to SU, from all SUs mapped to V. 226 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, 227 ValueType V); 228 229 /// Adds barrier chain edges from all SUs in map, and then clear the map. 230 /// This is equivalent to insertBarrierChain(), but optimized for the common 231 /// case where the new BarrierChain (a global memory object) has a higher 232 /// NodeNum than all SUs in map. It is assumed BarrierChain has been set 233 /// before calling this. 234 void addBarrierChain(Value2SUsMap &map); 235 236 /// Inserts a barrier chain in a huge region, far below current SU. 237 /// Adds barrier chain edges from all SUs in map with higher NodeNums than 238 /// this new BarrierChain, and remove them from map. It is assumed 239 /// BarrierChain has been set before calling this. 240 void insertBarrierChain(Value2SUsMap &map); 241 242 /// For an unanalyzable memory access, this Value is used in maps. 243 UndefValue *UnknownValue; 244 245 246 /// Topo - A topological ordering for SUnits which permits fast IsReachable 247 /// and similar queries. 248 ScheduleDAGTopologicalSort Topo; 249 250 using DbgValueVector = 251 std::vector<std::pair<MachineInstr *, MachineInstr *>>; 252 /// Remember instruction that precedes DBG_VALUE. 253 /// These are generated by buildSchedGraph but persist so they can be 254 /// referenced when emitting the final schedule. 255 DbgValueVector DbgValues; 256 MachineInstr *FirstDbgValue = nullptr; 257 258 /// Set of live physical registers for updating kill flags. 259 LiveRegUnits LiveRegs; 260 261 public: 262 explicit ScheduleDAGInstrs(MachineFunction &mf, 263 const MachineLoopInfo *mli, 264 bool RemoveKillFlags = false); 265 266 ~ScheduleDAGInstrs() override = default; 267 268 /// Gets the machine model for instruction scheduling. getSchedModel()269 const TargetSchedModel *getSchedModel() const { return &SchedModel; } 270 271 /// Resolves and cache a resolved scheduling class for an SUnit. getSchedClass(SUnit * SU)272 const MCSchedClassDesc *getSchedClass(SUnit *SU) const { 273 if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) 274 SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); 275 return SU->SchedClass; 276 } 277 278 /// IsReachable - Checks if SU is reachable from TargetSU. IsReachable(SUnit * SU,SUnit * TargetSU)279 bool IsReachable(SUnit *SU, SUnit *TargetSU) { 280 return Topo.IsReachable(SU, TargetSU); 281 } 282 283 /// Returns an iterator to the top of the current scheduling region. begin()284 MachineBasicBlock::iterator begin() const { return RegionBegin; } 285 286 /// Returns an iterator to the bottom of the current scheduling region. end()287 MachineBasicBlock::iterator end() const { return RegionEnd; } 288 289 /// Creates a new SUnit and return a ptr to it. 290 SUnit *newSUnit(MachineInstr *MI); 291 292 /// Returns an existing SUnit for this MI, or nullptr. 293 SUnit *getSUnit(MachineInstr *MI) const; 294 295 /// If this method returns true, handling of the scheduling regions 296 /// themselves (in case of a scheduling boundary in MBB) will be done 297 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()298 virtual bool doMBBSchedRegionsTopDown() const { return false; } 299 300 /// Prepares to perform scheduling in the given block. 301 virtual void startBlock(MachineBasicBlock *BB); 302 303 /// Cleans up after scheduling in the given block. 304 virtual void finishBlock(); 305 306 /// Initialize the DAG and common scheduler state for a new 307 /// scheduling region. This does not actually create the DAG, only clears 308 /// it. The scheduling driver may call BuildSchedGraph multiple times per 309 /// scheduling region. 310 virtual void enterRegion(MachineBasicBlock *bb, 311 MachineBasicBlock::iterator begin, 312 MachineBasicBlock::iterator end, 313 unsigned regioninstrs); 314 315 /// Called when the scheduler has finished scheduling the current region. 316 virtual void exitRegion(); 317 318 /// Builds SUnits for the current region. 319 /// If \p RPTracker is non-null, compute register pressure as a side effect. 320 /// The DAG builder is an efficient place to do it because it already visits 321 /// operands. 322 void buildSchedGraph(AAResults *AA, 323 RegPressureTracker *RPTracker = nullptr, 324 PressureDiffs *PDiffs = nullptr, 325 LiveIntervals *LIS = nullptr, 326 bool TrackLaneMasks = false); 327 328 /// Adds dependencies from instructions in the current list of 329 /// instructions being scheduled to scheduling barrier. We want to make sure 330 /// instructions which define registers that are either used by the 331 /// terminator or are live-out are properly scheduled. This is especially 332 /// important when the definition latency of the return value(s) are too 333 /// high to be hidden by the branch or when the liveout registers used by 334 /// instructions in the fallthrough block. 335 void addSchedBarrierDeps(); 336 337 /// Orders nodes according to selected style. 338 /// 339 /// Typically, a scheduling algorithm will implement schedule() without 340 /// overriding enterRegion() or exitRegion(). 341 virtual void schedule() = 0; 342 343 /// Allow targets to perform final scheduling actions at the level of the 344 /// whole MachineFunction. By default does nothing. finalizeSchedule()345 virtual void finalizeSchedule() {} 346 347 void dumpNode(const SUnit &SU) const override; 348 void dump() const override; 349 350 /// Returns a label for a DAG node that points to an instruction. 351 std::string getGraphNodeLabel(const SUnit *SU) const override; 352 353 /// Returns a label for the region of code covered by the DAG. 354 std::string getDAGName() const override; 355 356 /// Fixes register kill flags that scheduling has made invalid. 357 void fixupKills(MachineBasicBlock &MBB); 358 359 /// True if an edge can be added from PredSU to SuccSU without creating 360 /// a cycle. 361 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); 362 363 /// Add a DAG edge to the given SU with the given predecessor 364 /// dependence data. 365 /// 366 /// \returns true if the edge may be added without creating a cycle OR if an 367 /// equivalent edge already existed (false indicates failure). 368 bool addEdge(SUnit *SuccSU, const SDep &PredDep); 369 370 protected: 371 void initSUnits(); 372 void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); 373 void addPhysRegDeps(SUnit *SU, unsigned OperIdx); 374 void addVRegDefDeps(SUnit *SU, unsigned OperIdx); 375 void addVRegUseDeps(SUnit *SU, unsigned OperIdx); 376 377 /// Returns a mask for which lanes get read/written by the given (register) 378 /// machine operand. 379 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; 380 381 /// Returns true if the def register in \p MO has no uses. 382 bool deadDefHasNoUse(const MachineOperand &MO); 383 }; 384 385 /// Creates a new SUnit and return a ptr to it. newSUnit(MachineInstr * MI)386 inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { 387 #ifndef NDEBUG 388 const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0]; 389 #endif 390 SUnits.emplace_back(MI, (unsigned)SUnits.size()); 391 assert((Addr == nullptr || Addr == &SUnits[0]) && 392 "SUnits std::vector reallocated on the fly!"); 393 return &SUnits.back(); 394 } 395 396 /// Returns an existing SUnit for this MI, or nullptr. getSUnit(MachineInstr * MI)397 inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { 398 return MISUnitMap.lookup(MI); 399 } 400 401 } // end namespace llvm 402 403 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 404