1 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. 10 // 11 // Software pipelining (SWP) is an instruction scheduling technique for loops 12 // that overlap loop iterations and exploits ILP via a compiler transformation. 13 // 14 // Swing Modulo Scheduling is an implementation of software pipelining 15 // that generates schedules that are near optimal in terms of initiation 16 // interval, register requirements, and stage count. See the papers: 17 // 18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, 19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 20 // Conference on Parallel Architectures and Compilation Techiniques. 21 // 22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. 23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE 24 // Transactions on Computers, Vol. 50, No. 3, 2001. 25 // 26 // "An Implementation of Swing Modulo Scheduling With Extensions for 27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at 28 // Urbana-Champaign, 2005. 29 // 30 // 31 // The SMS algorithm consists of three main steps after computing the minimal 32 // initiation interval (MII). 33 // 1) Analyze the dependence graph and compute information about each 34 // instruction in the graph. 35 // 2) Order the nodes (instructions) by priority based upon the heuristics 36 // described in the algorithm. 37 // 3) Attempt to schedule the nodes in the specified order using the MII. 38 // 39 //===----------------------------------------------------------------------===// 40 #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H 41 #define LLVM_CODEGEN_MACHINEPIPELINER_H 42 43 #include "llvm/ADT/SetVector.h" 44 #include "llvm/CodeGen/DFAPacketizer.h" 45 #include "llvm/CodeGen/MachineDominators.h" 46 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 47 #include "llvm/CodeGen/MachineScheduler.h" 48 #include "llvm/CodeGen/RegisterClassInfo.h" 49 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 50 #include "llvm/CodeGen/ScheduleDAGMutation.h" 51 #include "llvm/CodeGen/TargetInstrInfo.h" 52 #include "llvm/CodeGen/WindowScheduler.h" 53 #include "llvm/InitializePasses.h" 54 55 #include <deque> 56 57 namespace llvm { 58 59 class AAResults; 60 class NodeSet; 61 class SMSchedule; 62 63 extern cl::opt<bool> SwpEnableCopyToPhi; 64 extern cl::opt<int> SwpForceIssueWidth; 65 66 /// The main class in the implementation of the target independent 67 /// software pipeliner pass. 68 class MachinePipeliner : public MachineFunctionPass { 69 public: 70 MachineFunction *MF = nullptr; 71 MachineOptimizationRemarkEmitter *ORE = nullptr; 72 const MachineLoopInfo *MLI = nullptr; 73 const MachineDominatorTree *MDT = nullptr; 74 const InstrItineraryData *InstrItins = nullptr; 75 const TargetInstrInfo *TII = nullptr; 76 RegisterClassInfo RegClassInfo; 77 bool disabledByPragma = false; 78 unsigned II_setByPragma = 0; 79 80 #ifndef NDEBUG 81 static int NumTries; 82 #endif 83 84 /// Cache the target analysis information about the loop. 85 struct LoopInfo { 86 MachineBasicBlock *TBB = nullptr; 87 MachineBasicBlock *FBB = nullptr; 88 SmallVector<MachineOperand, 4> BrCond; 89 MachineInstr *LoopInductionVar = nullptr; 90 MachineInstr *LoopCompare = nullptr; 91 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo = 92 nullptr; 93 }; 94 LoopInfo LI; 95 96 static char ID; 97 MachinePipeliner()98 MachinePipeliner() : MachineFunctionPass(ID) { 99 initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); 100 } 101 102 bool runOnMachineFunction(MachineFunction &MF) override; 103 104 void getAnalysisUsage(AnalysisUsage &AU) const override; 105 106 private: 107 void preprocessPhiNodes(MachineBasicBlock &B); 108 bool canPipelineLoop(MachineLoop &L); 109 bool scheduleLoop(MachineLoop &L); 110 bool swingModuloScheduler(MachineLoop &L); 111 void setPragmaPipelineOptions(MachineLoop &L); 112 bool runWindowScheduler(MachineLoop &L); 113 bool useSwingModuloScheduler(); 114 bool useWindowScheduler(bool Changed); 115 }; 116 117 /// This class builds the dependence graph for the instructions in a loop, 118 /// and attempts to schedule the instructions using the SMS algorithm. 119 class SwingSchedulerDAG : public ScheduleDAGInstrs { 120 MachinePipeliner &Pass; 121 /// The minimum initiation interval between iterations for this schedule. 122 unsigned MII = 0; 123 /// The maximum initiation interval between iterations for this schedule. 124 unsigned MAX_II = 0; 125 /// Set to true if a valid pipelined schedule is found for the loop. 126 bool Scheduled = false; 127 MachineLoop &Loop; 128 LiveIntervals &LIS; 129 const RegisterClassInfo &RegClassInfo; 130 unsigned II_setByPragma = 0; 131 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr; 132 133 /// A toplogical ordering of the SUnits, which is needed for changing 134 /// dependences and iterating over the SUnits. 135 ScheduleDAGTopologicalSort Topo; 136 137 struct NodeInfo { 138 int ASAP = 0; 139 int ALAP = 0; 140 int ZeroLatencyDepth = 0; 141 int ZeroLatencyHeight = 0; 142 143 NodeInfo() = default; 144 }; 145 /// Computed properties for each node in the graph. 146 std::vector<NodeInfo> ScheduleInfo; 147 148 enum OrderKind { BottomUp = 0, TopDown = 1 }; 149 /// Computed node ordering for scheduling. 150 SetVector<SUnit *> NodeOrder; 151 152 using NodeSetType = SmallVector<NodeSet, 8>; 153 using ValueMapTy = DenseMap<unsigned, unsigned>; 154 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; 155 using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; 156 157 /// Instructions to change when emitting the final schedule. 158 DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; 159 160 /// We may create a new instruction, so remember it because it 161 /// must be deleted when the pass is finished. 162 DenseMap<MachineInstr*, MachineInstr *> NewMIs; 163 164 /// Ordered list of DAG postprocessing steps. 165 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 166 167 /// Helper class to implement Johnson's circuit finding algorithm. 168 class Circuits { 169 std::vector<SUnit> &SUnits; 170 SetVector<SUnit *> Stack; 171 BitVector Blocked; 172 SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; 173 SmallVector<SmallVector<int, 4>, 16> AdjK; 174 // Node to Index from ScheduleDAGTopologicalSort 175 std::vector<int> *Node2Idx; 176 unsigned NumPaths = 0u; 177 static unsigned MaxPaths; 178 179 public: Circuits(std::vector<SUnit> & SUs,ScheduleDAGTopologicalSort & Topo)180 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo) 181 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) { 182 Node2Idx = new std::vector<int>(SUs.size()); 183 unsigned Idx = 0; 184 for (const auto &NodeNum : Topo) 185 Node2Idx->at(NodeNum) = Idx++; 186 } 187 Circuits &operator=(const Circuits &other) = delete; 188 Circuits(const Circuits &other) = delete; ~Circuits()189 ~Circuits() { delete Node2Idx; } 190 191 /// Reset the data structures used in the circuit algorithm. reset()192 void reset() { 193 Stack.clear(); 194 Blocked.reset(); 195 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); 196 NumPaths = 0; 197 } 198 199 void createAdjacencyStructure(SwingSchedulerDAG *DAG); 200 bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); 201 void unblock(int U); 202 }; 203 204 struct CopyToPhiMutation : public ScheduleDAGMutation { 205 void apply(ScheduleDAGInstrs *DAG) override; 206 }; 207 208 public: SwingSchedulerDAG(MachinePipeliner & P,MachineLoop & L,LiveIntervals & lis,const RegisterClassInfo & rci,unsigned II,TargetInstrInfo::PipelinerLoopInfo * PLI)209 SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, 210 const RegisterClassInfo &rci, unsigned II, 211 TargetInstrInfo::PipelinerLoopInfo *PLI) 212 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), 213 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI), 214 Topo(SUnits, &ExitSU) { 215 P.MF->getSubtarget().getSMSMutations(Mutations); 216 if (SwpEnableCopyToPhi) 217 Mutations.push_back(std::make_unique<CopyToPhiMutation>()); 218 } 219 220 void schedule() override; 221 void finishBlock() override; 222 223 /// Return true if the loop kernel has been scheduled. hasNewSchedule()224 bool hasNewSchedule() { return Scheduled; } 225 226 /// Return the earliest time an instruction may be scheduled. getASAP(SUnit * Node)227 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } 228 229 /// Return the latest time an instruction my be scheduled. getALAP(SUnit * Node)230 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } 231 232 /// The mobility function, which the number of slots in which 233 /// an instruction may be scheduled. getMOV(SUnit * Node)234 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } 235 236 /// The depth, in the dependence graph, for a node. getDepth(SUnit * Node)237 unsigned getDepth(SUnit *Node) { return Node->getDepth(); } 238 239 /// The maximum unweighted length of a path from an arbitrary node to the 240 /// given node in which each edge has latency 0 getZeroLatencyDepth(SUnit * Node)241 int getZeroLatencyDepth(SUnit *Node) { 242 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; 243 } 244 245 /// The height, in the dependence graph, for a node. getHeight(SUnit * Node)246 unsigned getHeight(SUnit *Node) { return Node->getHeight(); } 247 248 /// The maximum unweighted length of a path from the given node to an 249 /// arbitrary node in which each edge has latency 0 getZeroLatencyHeight(SUnit * Node)250 int getZeroLatencyHeight(SUnit *Node) { 251 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; 252 } 253 254 /// Return true if the dependence is a back-edge in the data dependence graph. 255 /// Since the DAG doesn't contain cycles, we represent a cycle in the graph 256 /// using an anti dependence from a Phi to an instruction. isBackedge(SUnit * Source,const SDep & Dep)257 bool isBackedge(SUnit *Source, const SDep &Dep) { 258 if (Dep.getKind() != SDep::Anti) 259 return false; 260 return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); 261 } 262 263 bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true); 264 265 /// The distance function, which indicates that operation V of iteration I 266 /// depends on operations U of iteration I-distance. getDistance(SUnit * U,SUnit * V,const SDep & Dep)267 unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { 268 // Instructions that feed a Phi have a distance of 1. Computing larger 269 // values for arrays requires data dependence information. 270 if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) 271 return 1; 272 return 0; 273 } 274 275 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); 276 277 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); 278 279 /// Return the new base register that was stored away for the changed 280 /// instruction. getInstrBaseReg(SUnit * SU)281 unsigned getInstrBaseReg(SUnit *SU) const { 282 DenseMap<SUnit *, std::pair<unsigned, int64_t>>::const_iterator It = 283 InstrChanges.find(SU); 284 if (It != InstrChanges.end()) 285 return It->second.first; 286 return 0; 287 } 288 addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)289 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 290 Mutations.push_back(std::move(Mutation)); 291 } 292 classof(const ScheduleDAGInstrs * DAG)293 static bool classof(const ScheduleDAGInstrs *DAG) { return true; } 294 295 private: 296 void addLoopCarriedDependences(AAResults *AA); 297 void updatePhiDependences(); 298 void changeDependences(); 299 unsigned calculateResMII(); 300 unsigned calculateRecMII(NodeSetType &RecNodeSets); 301 void findCircuits(NodeSetType &NodeSets); 302 void fuseRecs(NodeSetType &NodeSets); 303 void removeDuplicateNodes(NodeSetType &NodeSets); 304 void computeNodeFunctions(NodeSetType &NodeSets); 305 void registerPressureFilter(NodeSetType &NodeSets); 306 void colocateNodeSets(NodeSetType &NodeSets); 307 void checkNodeSets(NodeSetType &NodeSets); 308 void groupRemainingNodes(NodeSetType &NodeSets); 309 void addConnectedNodes(SUnit *SU, NodeSet &NewSet, 310 SetVector<SUnit *> &NodesAdded); 311 void computeNodeOrder(NodeSetType &NodeSets); 312 void checkValidNodeOrder(const NodeSetType &Circuits) const; 313 bool schedulePipeline(SMSchedule &Schedule); 314 bool computeDelta(MachineInstr &MI, unsigned &Delta); 315 MachineInstr *findDefInLoop(Register Reg); 316 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, 317 unsigned &OffsetPos, unsigned &NewBase, 318 int64_t &NewOffset); 319 void postProcessDAG(); 320 /// Set the Minimum Initiation Interval for this schedule attempt. 321 void setMII(unsigned ResMII, unsigned RecMII); 322 /// Set the Maximum Initiation Interval for this schedule attempt. 323 void setMAX_II(); 324 }; 325 326 /// A NodeSet contains a set of SUnit DAG nodes with additional information 327 /// that assigns a priority to the set. 328 class NodeSet { 329 SetVector<SUnit *> Nodes; 330 bool HasRecurrence = false; 331 unsigned RecMII = 0; 332 int MaxMOV = 0; 333 unsigned MaxDepth = 0; 334 unsigned Colocate = 0; 335 SUnit *ExceedPressure = nullptr; 336 unsigned Latency = 0; 337 338 public: 339 using iterator = SetVector<SUnit *>::const_iterator; 340 341 NodeSet() = default; NodeSet(iterator S,iterator E)342 NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { 343 Latency = 0; 344 for (const SUnit *Node : Nodes) { 345 DenseMap<SUnit *, unsigned> SuccSUnitLatency; 346 for (const SDep &Succ : Node->Succs) { 347 auto SuccSUnit = Succ.getSUnit(); 348 if (!Nodes.count(SuccSUnit)) 349 continue; 350 unsigned CurLatency = Succ.getLatency(); 351 unsigned MaxLatency = 0; 352 if (SuccSUnitLatency.count(SuccSUnit)) 353 MaxLatency = SuccSUnitLatency[SuccSUnit]; 354 if (CurLatency > MaxLatency) 355 SuccSUnitLatency[SuccSUnit] = CurLatency; 356 } 357 for (auto SUnitLatency : SuccSUnitLatency) 358 Latency += SUnitLatency.second; 359 } 360 } 361 insert(SUnit * SU)362 bool insert(SUnit *SU) { return Nodes.insert(SU); } 363 insert(iterator S,iterator E)364 void insert(iterator S, iterator E) { Nodes.insert(S, E); } 365 remove_if(UnaryPredicate P)366 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { 367 return Nodes.remove_if(P); 368 } 369 count(SUnit * SU)370 unsigned count(SUnit *SU) const { return Nodes.count(SU); } 371 hasRecurrence()372 bool hasRecurrence() { return HasRecurrence; }; 373 size()374 unsigned size() const { return Nodes.size(); } 375 empty()376 bool empty() const { return Nodes.empty(); } 377 getNode(unsigned i)378 SUnit *getNode(unsigned i) const { return Nodes[i]; }; 379 setRecMII(unsigned mii)380 void setRecMII(unsigned mii) { RecMII = mii; }; 381 setColocate(unsigned c)382 void setColocate(unsigned c) { Colocate = c; }; 383 setExceedPressure(SUnit * SU)384 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } 385 isExceedSU(SUnit * SU)386 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } 387 compareRecMII(NodeSet & RHS)388 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } 389 getRecMII()390 int getRecMII() { return RecMII; } 391 392 /// Summarize node functions for the entire node set. computeNodeSetInfo(SwingSchedulerDAG * SSD)393 void computeNodeSetInfo(SwingSchedulerDAG *SSD) { 394 for (SUnit *SU : *this) { 395 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU)); 396 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU)); 397 } 398 } 399 getLatency()400 unsigned getLatency() { return Latency; } 401 getMaxDepth()402 unsigned getMaxDepth() { return MaxDepth; } 403 clear()404 void clear() { 405 Nodes.clear(); 406 RecMII = 0; 407 HasRecurrence = false; 408 MaxMOV = 0; 409 MaxDepth = 0; 410 Colocate = 0; 411 ExceedPressure = nullptr; 412 } 413 414 operator SetVector<SUnit *> &() { return Nodes; } 415 416 /// Sort the node sets by importance. First, rank them by recurrence MII, 417 /// then by mobility (least mobile done first), and finally by depth. 418 /// Each node set may contain a colocate value which is used as the first 419 /// tie breaker, if it's set. 420 bool operator>(const NodeSet &RHS) const { 421 if (RecMII == RHS.RecMII) { 422 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) 423 return Colocate < RHS.Colocate; 424 if (MaxMOV == RHS.MaxMOV) 425 return MaxDepth > RHS.MaxDepth; 426 return MaxMOV < RHS.MaxMOV; 427 } 428 return RecMII > RHS.RecMII; 429 } 430 431 bool operator==(const NodeSet &RHS) const { 432 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && 433 MaxDepth == RHS.MaxDepth; 434 } 435 436 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } 437 begin()438 iterator begin() { return Nodes.begin(); } end()439 iterator end() { return Nodes.end(); } 440 void print(raw_ostream &os) const; 441 442 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 443 LLVM_DUMP_METHOD void dump() const; 444 #endif 445 }; 446 447 // 16 was selected based on the number of ProcResource kinds for all 448 // existing Subtargets, so that SmallVector don't need to resize too often. 449 static const int DefaultProcResSize = 16; 450 451 class ResourceManager { 452 private: 453 const MCSubtargetInfo *STI; 454 const MCSchedModel &SM; 455 const TargetSubtargetInfo *ST; 456 const TargetInstrInfo *TII; 457 ScheduleDAGInstrs *DAG; 458 const bool UseDFA; 459 /// DFA resources for each slot 460 llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources; 461 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle 462 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F) 463 llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT; 464 /// The number of scheduled micro operations for each slot. Micro operations 465 /// are assumed to be scheduled one per cycle, starting with the cycle in 466 /// which the instruction is scheduled. 467 llvm::SmallVector<int> NumScheduledMops; 468 /// Each processor resource is associated with a so-called processor resource 469 /// mask. This vector allows to correlate processor resource IDs with 470 /// processor resource masks. There is exactly one element per each processor 471 /// resource declared by the scheduling model. 472 llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks; 473 int InitiationInterval = 0; 474 /// The number of micro operations that can be scheduled at a cycle. 475 int IssueWidth; 476 477 int calculateResMIIDFA() const; 478 /// Check if MRT is overbooked 479 bool isOverbooked() const; 480 /// Reserve resources on MRT 481 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 482 /// Unreserve resources on MRT 483 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 484 485 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor. 486 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C, 487 /// II). positiveModulo(int Dividend,int Divisor)488 int positiveModulo(int Dividend, int Divisor) const { 489 assert(Divisor > 0); 490 int R = Dividend % Divisor; 491 if (R < 0) 492 R += Divisor; 493 return R; 494 } 495 496 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 497 LLVM_DUMP_METHOD void dumpMRT() const; 498 #endif 499 500 public: ResourceManager(const TargetSubtargetInfo * ST,ScheduleDAGInstrs * DAG)501 ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG) 502 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()), 503 DAG(DAG), UseDFA(ST->useDFAforSMS()), 504 ProcResourceMasks(SM.getNumProcResourceKinds(), 0), 505 IssueWidth(SM.IssueWidth) { 506 initProcResourceVectors(SM, ProcResourceMasks); 507 if (IssueWidth <= 0) 508 // If IssueWidth is not specified, set a sufficiently large value 509 IssueWidth = 100; 510 if (SwpForceIssueWidth > 0) 511 IssueWidth = SwpForceIssueWidth; 512 } 513 514 void initProcResourceVectors(const MCSchedModel &SM, 515 SmallVectorImpl<uint64_t> &Masks); 516 517 /// Check if the resources occupied by a machine instruction are available 518 /// in the current state. 519 bool canReserveResources(SUnit &SU, int Cycle); 520 521 /// Reserve the resources occupied by a machine instruction and change the 522 /// current state to reflect that change. 523 void reserveResources(SUnit &SU, int Cycle); 524 525 int calculateResMII() const; 526 527 /// Initialize resources with the initiation interval II. 528 void init(int II); 529 }; 530 531 /// This class represents the scheduled code. The main data structure is a 532 /// map from scheduled cycle to instructions. During scheduling, the 533 /// data structure explicitly represents all stages/iterations. When 534 /// the algorithm finshes, the schedule is collapsed into a single stage, 535 /// which represents instructions from different loop iterations. 536 /// 537 /// The SMS algorithm allows negative values for cycles, so the first cycle 538 /// in the schedule is the smallest cycle value. 539 class SMSchedule { 540 private: 541 /// Map from execution cycle to instructions. 542 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; 543 544 /// Map from instruction to execution cycle. 545 std::map<SUnit *, int> InstrToCycle; 546 547 /// Keep track of the first cycle value in the schedule. It starts 548 /// as zero, but the algorithm allows negative values. 549 int FirstCycle = 0; 550 551 /// Keep track of the last cycle value in the schedule. 552 int LastCycle = 0; 553 554 /// The initiation interval (II) for the schedule. 555 int InitiationInterval = 0; 556 557 /// Target machine information. 558 const TargetSubtargetInfo &ST; 559 560 /// Virtual register information. 561 MachineRegisterInfo &MRI; 562 563 ResourceManager ProcItinResources; 564 565 public: SMSchedule(MachineFunction * mf,SwingSchedulerDAG * DAG)566 SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG) 567 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), 568 ProcItinResources(&ST, DAG) {} 569 reset()570 void reset() { 571 ScheduledInstrs.clear(); 572 InstrToCycle.clear(); 573 FirstCycle = 0; 574 LastCycle = 0; 575 InitiationInterval = 0; 576 } 577 578 /// Set the initiation interval for this schedule. setInitiationInterval(int ii)579 void setInitiationInterval(int ii) { 580 InitiationInterval = ii; 581 ProcItinResources.init(ii); 582 } 583 584 /// Return the initiation interval for this schedule. getInitiationInterval()585 int getInitiationInterval() const { return InitiationInterval; } 586 587 /// Return the first cycle in the completed schedule. This 588 /// can be a negative value. getFirstCycle()589 int getFirstCycle() const { return FirstCycle; } 590 591 /// Return the last cycle in the finalized schedule. getFinalCycle()592 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } 593 594 /// Return the cycle of the earliest scheduled instruction in the dependence 595 /// chain. 596 int earliestCycleInChain(const SDep &Dep); 597 598 /// Return the cycle of the latest scheduled instruction in the dependence 599 /// chain. 600 int latestCycleInChain(const SDep &Dep); 601 602 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, 603 SwingSchedulerDAG *DAG); 604 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); 605 606 /// Iterators for the cycle to instruction map. 607 using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; 608 using const_sched_iterator = 609 DenseMap<int, std::deque<SUnit *>>::const_iterator; 610 611 /// Return true if the instruction is scheduled at the specified stage. isScheduledAtStage(SUnit * SU,unsigned StageNum)612 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { 613 return (stageScheduled(SU) == (int)StageNum); 614 } 615 616 /// Return the stage for a scheduled instruction. Return -1 if 617 /// the instruction has not been scheduled. stageScheduled(SUnit * SU)618 int stageScheduled(SUnit *SU) const { 619 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 620 if (it == InstrToCycle.end()) 621 return -1; 622 return (it->second - FirstCycle) / InitiationInterval; 623 } 624 625 /// Return the cycle for a scheduled instruction. This function normalizes 626 /// the first cycle to be 0. cycleScheduled(SUnit * SU)627 unsigned cycleScheduled(SUnit *SU) const { 628 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 629 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled."); 630 return (it->second - FirstCycle) % InitiationInterval; 631 } 632 633 /// Return the maximum stage count needed for this schedule. getMaxStageCount()634 unsigned getMaxStageCount() { 635 return (LastCycle - FirstCycle) / InitiationInterval; 636 } 637 638 /// Return the instructions that are scheduled at the specified cycle. getInstructions(int cycle)639 std::deque<SUnit *> &getInstructions(int cycle) { 640 return ScheduledInstrs[cycle]; 641 } 642 643 SmallSet<SUnit *, 8> 644 computeUnpipelineableNodes(SwingSchedulerDAG *SSD, 645 TargetInstrInfo::PipelinerLoopInfo *PLI); 646 647 std::deque<SUnit *> 648 reorderInstructions(const SwingSchedulerDAG *SSD, 649 const std::deque<SUnit *> &Instrs) const; 650 651 bool 652 normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, 653 TargetInstrInfo::PipelinerLoopInfo *PLI); 654 bool isValidSchedule(SwingSchedulerDAG *SSD); 655 void finalizeSchedule(SwingSchedulerDAG *SSD); 656 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, 657 std::deque<SUnit *> &Insts) const; 658 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const; 659 bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, 660 MachineOperand &MO) const; 661 662 bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, 663 SwingSchedulerDAG *DAG) const; 664 void print(raw_ostream &os) const; 665 void dump() const; 666 }; 667 668 } // end namespace llvm 669 670 #endif // LLVM_CODEGEN_MACHINEPIPELINER_H 671