1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling 10 /// for a MachineInstr-based dependency graph. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 16 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PointerIntPair.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/SparseMultiSet.h" 21 #include "llvm/ADT/identity.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/CodeGen/LiveRegUnits.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/CodeGen/TargetRegisterInfo.h" 27 #include "llvm/CodeGen/TargetSchedule.h" 28 #include "llvm/MC/LaneBitmask.h" 29 #include "llvm/Support/Compiler.h" 30 #include <cassert> 31 #include <cstdint> 32 #include <list> 33 #include <string> 34 #include <utility> 35 #include <vector> 36 37 namespace llvm { 38 39 class AAResults; 40 class LiveIntervals; 41 class MachineFrameInfo; 42 class MachineFunction; 43 class MachineInstr; 44 class MachineLoopInfo; 45 class MachineOperand; 46 struct MCSchedClassDesc; 47 class PressureDiffs; 48 class PseudoSourceValue; 49 class RegPressureTracker; 50 class UndefValue; 51 class Value; 52 53 /// An individual mapping from virtual register number to SUnit. 54 struct VReg2SUnit { 55 Register VirtReg; 56 LaneBitmask LaneMask; 57 SUnit *SU; 58 VReg2SUnitVReg2SUnit59 VReg2SUnit(Register VReg, LaneBitmask LaneMask, SUnit *SU) 60 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} 61 getSparseSetIndexVReg2SUnit62 unsigned getSparseSetIndex() const { 63 return VirtReg.virtRegIndex(); 64 } 65 }; 66 67 /// Mapping from virtual register to SUnit including an operand index. 68 struct VReg2SUnitOperIdx : public VReg2SUnit { 69 unsigned OperandIndex; 70 VReg2SUnitOperIdxVReg2SUnitOperIdx71 VReg2SUnitOperIdx(Register VReg, LaneBitmask LaneMask, 72 unsigned OperandIndex, SUnit *SU) 73 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} 74 }; 75 76 /// Record a physical register access. 77 /// For non-data-dependent uses, OpIdx == -1. 78 struct PhysRegSUOper { 79 SUnit *SU; 80 int OpIdx; 81 unsigned RegUnit; 82 PhysRegSUOperPhysRegSUOper83 PhysRegSUOper(SUnit *su, int op, unsigned R) 84 : SU(su), OpIdx(op), RegUnit(R) {} 85 getSparseSetIndexPhysRegSUOper86 unsigned getSparseSetIndex() const { return RegUnit; } 87 }; 88 89 /// Use a SparseMultiSet to track physical registers. Storage is only 90 /// allocated once for the pass. It can be cleared in constant time and reused 91 /// without any frees. 92 using RegUnit2SUnitsMap = 93 SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; 94 95 /// Track local uses of virtual registers. These uses are gathered by the DAG 96 /// builder and may be consulted by the scheduler to avoid iterating an entire 97 /// vreg use list. 98 using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; 99 100 using VReg2SUnitOperIdxMultiMap = 101 SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; 102 103 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; 104 105 struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { UnderlyingObjectUnderlyingObject106 UnderlyingObject(ValueType V, bool MayAlias) 107 : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} 108 getValueUnderlyingObject109 ValueType getValue() const { return getPointer(); } mayAliasUnderlyingObject110 bool mayAlias() const { return getInt(); } 111 }; 112 113 using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; 114 115 /// A ScheduleDAG for scheduling lists of MachineInstr. 116 class LLVM_ABI ScheduleDAGInstrs : public ScheduleDAG { 117 protected: 118 const MachineLoopInfo *MLI = nullptr; 119 const MachineFrameInfo &MFI; 120 121 /// TargetSchedModel provides an interface to the machine model. 122 TargetSchedModel SchedModel; 123 124 /// True if the DAG builder should remove kill flags (in preparation for 125 /// rescheduling). 126 bool RemoveKillFlags; 127 128 /// True if regions with a single MI should be scheduled. 129 bool ScheduleSingleMIRegions = false; 130 131 /// The standard DAG builder does not normally include terminators as DAG 132 /// nodes because it does not create the necessary dependencies to prevent 133 /// reordering. A specialized scheduler can override 134 /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate 135 /// it has taken responsibility for scheduling the terminator correctly. 136 bool CanHandleTerminators = false; 137 138 /// Whether lane masks should get tracked. 139 bool TrackLaneMasks = false; 140 141 // State specific to the current scheduling region. 142 // ------------------------------------------------ 143 144 /// The block in which to insert instructions 145 MachineBasicBlock *BB = nullptr; 146 147 /// The beginning of the range to be scheduled. 148 MachineBasicBlock::iterator RegionBegin; 149 150 /// The end of the range to be scheduled. 151 MachineBasicBlock::iterator RegionEnd; 152 153 /// Instructions in this region (distance(RegionBegin, RegionEnd)). 154 unsigned NumRegionInstrs = 0; 155 156 /// After calling BuildSchedGraph, each machine instruction in the current 157 /// scheduling region is mapped to an SUnit. 158 DenseMap<MachineInstr*, SUnit*> MISUnitMap; 159 160 // State internal to DAG building. 161 // ------------------------------- 162 163 /// Defs, Uses - Remember where defs and uses of each register are as we 164 /// iterate upward through the instructions. This is allocated here instead 165 /// of inside BuildSchedGraph to avoid the need for it to be initialized and 166 /// destructed for each block. 167 RegUnit2SUnitsMap Defs; 168 RegUnit2SUnitsMap Uses; 169 170 /// Tracks the last instruction(s) in this region defining each virtual 171 /// register. There may be multiple current definitions for a register with 172 /// disjunct lanemasks. 173 VReg2SUnitMultiMap CurrentVRegDefs; 174 /// Tracks the last instructions in this region using each virtual register. 175 VReg2SUnitOperIdxMultiMap CurrentVRegUses; 176 177 mutable std::optional<BatchAAResults> AAForDep; 178 179 /// Remember a generic side-effecting instruction as we proceed. 180 /// No other SU ever gets scheduled around it (except in the special 181 /// case of a huge region that gets reduced). 182 SUnit *BarrierChain = nullptr; 183 184 SmallVector<ClusterInfo> Clusters; 185 186 public: 187 /// A list of SUnits, used in Value2SUsMap, during DAG construction. 188 /// Note: to gain speed it might be worth investigating an optimized 189 /// implementation of this data structure, such as a singly linked list 190 /// with a memory pool (SmallVector was tried but slow and SparseSet is not 191 /// applicable). 192 using SUList = std::list<SUnit *>; 193 194 /// The direction that should be used to dump the scheduled Sequence. 195 enum DumpDirection { 196 TopDown, 197 BottomUp, 198 Bidirectional, 199 NotSet, 200 }; 201 setDumpDirection(DumpDirection D)202 void setDumpDirection(DumpDirection D) { DumpDir = D; } 203 204 protected: 205 DumpDirection DumpDir = NotSet; 206 207 /// A map from ValueType to SUList, used during DAG construction, as 208 /// a means of remembering which SUs depend on which memory locations. 209 class Value2SUsMap; 210 211 /// Returns a (possibly null) pointer to the current BatchAAResults. getAAForDep()212 BatchAAResults *getAAForDep() const { 213 if (AAForDep.has_value()) 214 return &AAForDep.value(); 215 return nullptr; 216 } 217 218 /// Reduces maps in FIFO order, by N SUs. This is better than turning 219 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since 220 /// it avoids unnecessary edges between seen SUs above the new BarrierChain, 221 /// and those below it. 222 void reduceHugeMemNodeMaps(Value2SUsMap &stores, 223 Value2SUsMap &loads, unsigned N); 224 225 /// Adds a chain edge between SUa and SUb, but only if both 226 /// AAResults and Target fail to deny the dependency. 227 void addChainDependency(SUnit *SUa, SUnit *SUb, 228 unsigned Latency = 0); 229 230 /// Adds dependencies as needed from all SUs in list to SU. addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)231 void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) { 232 for (SUnit *Entry : SUs) 233 addChainDependency(SU, Entry, Latency); 234 } 235 236 /// Adds dependencies as needed from all SUs in map, to SU. 237 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); 238 239 /// Adds dependencies as needed to SU, from all SUs mapped to V. 240 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, 241 ValueType V); 242 243 /// Adds barrier chain edges from all SUs in map, and then clear the map. 244 /// This is equivalent to insertBarrierChain(), but optimized for the common 245 /// case where the new BarrierChain (a global memory object) has a higher 246 /// NodeNum than all SUs in map. It is assumed BarrierChain has been set 247 /// before calling this. 248 void addBarrierChain(Value2SUsMap &map); 249 250 /// Inserts a barrier chain in a huge region, far below current SU. 251 /// Adds barrier chain edges from all SUs in map with higher NodeNums than 252 /// this new BarrierChain, and remove them from map. It is assumed 253 /// BarrierChain has been set before calling this. 254 void insertBarrierChain(Value2SUsMap &map); 255 256 /// For an unanalyzable memory access, this Value is used in maps. 257 UndefValue *UnknownValue; 258 259 260 /// Topo - A topological ordering for SUnits which permits fast IsReachable 261 /// and similar queries. 262 ScheduleDAGTopologicalSort Topo; 263 264 using DbgValueVector = 265 std::vector<std::pair<MachineInstr *, MachineInstr *>>; 266 /// Remember instruction that precedes DBG_VALUE. 267 /// These are generated by buildSchedGraph but persist so they can be 268 /// referenced when emitting the final schedule. 269 DbgValueVector DbgValues; 270 MachineInstr *FirstDbgValue = nullptr; 271 272 /// Set of live physical registers for updating kill flags. 273 LiveRegUnits LiveRegs; 274 275 public: 276 explicit ScheduleDAGInstrs(MachineFunction &mf, 277 const MachineLoopInfo *mli, 278 bool RemoveKillFlags = false); 279 280 ~ScheduleDAGInstrs() override = default; 281 282 /// Gets the machine model for instruction scheduling. getSchedModel()283 const TargetSchedModel *getSchedModel() const { return &SchedModel; } 284 285 /// Resolves and cache a resolved scheduling class for an SUnit. getSchedClass(SUnit * SU)286 const MCSchedClassDesc *getSchedClass(SUnit *SU) const { 287 if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) 288 SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); 289 return SU->SchedClass; 290 } 291 292 /// IsReachable - Checks if SU is reachable from TargetSU. IsReachable(SUnit * SU,SUnit * TargetSU)293 bool IsReachable(SUnit *SU, SUnit *TargetSU) { 294 return Topo.IsReachable(SU, TargetSU); 295 } 296 297 /// Whether regions with a single MI should be scheduled. shouldScheduleSingleMIRegions()298 bool shouldScheduleSingleMIRegions() const { 299 return ScheduleSingleMIRegions; 300 } 301 302 /// Returns an iterator to the top of the current scheduling region. begin()303 MachineBasicBlock::iterator begin() const { return RegionBegin; } 304 305 /// Returns an iterator to the bottom of the current scheduling region. end()306 MachineBasicBlock::iterator end() const { return RegionEnd; } 307 308 /// Creates a new SUnit and return a ptr to it. 309 SUnit *newSUnit(MachineInstr *MI); 310 311 /// Returns an existing SUnit for this MI, or nullptr. 312 SUnit *getSUnit(MachineInstr *MI) const; 313 314 /// If this method returns true, handling of the scheduling regions 315 /// themselves (in case of a scheduling boundary in MBB) will be done 316 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()317 virtual bool doMBBSchedRegionsTopDown() const { return false; } 318 319 /// Prepares to perform scheduling in the given block. 320 virtual void startBlock(MachineBasicBlock *BB); 321 322 /// Cleans up after scheduling in the given block. 323 virtual void finishBlock(); 324 325 /// Initialize the DAG and common scheduler state for a new 326 /// scheduling region. This does not actually create the DAG, only clears 327 /// it. The scheduling driver may call BuildSchedGraph multiple times per 328 /// scheduling region. 329 virtual void enterRegion(MachineBasicBlock *bb, 330 MachineBasicBlock::iterator begin, 331 MachineBasicBlock::iterator end, 332 unsigned regioninstrs); 333 334 /// Called when the scheduler has finished scheduling the current region. 335 virtual void exitRegion(); 336 337 /// Builds SUnits for the current region. 338 /// If \p RPTracker is non-null, compute register pressure as a side effect. 339 /// The DAG builder is an efficient place to do it because it already visits 340 /// operands. 341 void buildSchedGraph(AAResults *AA, 342 RegPressureTracker *RPTracker = nullptr, 343 PressureDiffs *PDiffs = nullptr, 344 LiveIntervals *LIS = nullptr, 345 bool TrackLaneMasks = false); 346 347 /// Adds dependencies from instructions in the current list of 348 /// instructions being scheduled to scheduling barrier. We want to make sure 349 /// instructions which define registers that are either used by the 350 /// terminator or are live-out are properly scheduled. This is especially 351 /// important when the definition latency of the return value(s) are too 352 /// high to be hidden by the branch or when the liveout registers used by 353 /// instructions in the fallthrough block. 354 void addSchedBarrierDeps(); 355 356 /// Orders nodes according to selected style. 357 /// 358 /// Typically, a scheduling algorithm will implement schedule() without 359 /// overriding enterRegion() or exitRegion(). 360 virtual void schedule() = 0; 361 362 /// Allow targets to perform final scheduling actions at the level of the 363 /// whole MachineFunction. By default does nothing. finalizeSchedule()364 virtual void finalizeSchedule() {} 365 366 void dumpNode(const SUnit &SU) const override; 367 void dump() const override; 368 369 /// Returns a label for a DAG node that points to an instruction. 370 std::string getGraphNodeLabel(const SUnit *SU) const override; 371 372 /// Returns a label for the region of code covered by the DAG. 373 std::string getDAGName() const override; 374 375 /// Fixes register kill flags that scheduling has made invalid. 376 void fixupKills(MachineBasicBlock &MBB); 377 378 /// True if an edge can be added from PredSU to SuccSU without creating 379 /// a cycle. 380 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); 381 382 /// Add a DAG edge to the given SU with the given predecessor 383 /// dependence data. 384 /// 385 /// \returns true if the edge may be added without creating a cycle OR if an 386 /// equivalent edge already existed (false indicates failure). 387 bool addEdge(SUnit *SuccSU, const SDep &PredDep); 388 389 /// Returns the array of the clusters. getClusters()390 SmallVector<ClusterInfo> &getClusters() { return Clusters; } 391 392 /// Get the specific cluster, return nullptr for InvalidClusterId. getCluster(unsigned Idx)393 ClusterInfo *getCluster(unsigned Idx) { 394 return Idx != InvalidClusterId ? &Clusters[Idx] : nullptr; 395 } 396 397 protected: 398 void initSUnits(); 399 void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); 400 void addPhysRegDeps(SUnit *SU, unsigned OperIdx); 401 void addVRegDefDeps(SUnit *SU, unsigned OperIdx); 402 void addVRegUseDeps(SUnit *SU, unsigned OperIdx); 403 404 /// Returns a mask for which lanes get read/written by the given (register) 405 /// machine operand. 406 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; 407 408 /// Returns true if the def register in \p MO has no uses. 409 bool deadDefHasNoUse(const MachineOperand &MO); 410 }; 411 412 /// Creates a new SUnit and return a ptr to it. newSUnit(MachineInstr * MI)413 inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { 414 #ifndef NDEBUG 415 const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0]; 416 #endif 417 SUnits.emplace_back(MI, (unsigned)SUnits.size()); 418 assert((Addr == nullptr || Addr == &SUnits[0]) && 419 "SUnits std::vector reallocated on the fly!"); 420 return &SUnits.back(); 421 } 422 423 /// Returns an existing SUnit for this MI, or nullptr. getSUnit(MachineInstr * MI)424 inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { 425 return MISUnitMap.lookup(MI); 426 } 427 428 } // end namespace llvm 429 430 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 431