1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides an interface for customizing the standard MachineScheduler 10 // pass. Note that the entire pass may be replaced as follows: 11 // 12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { 13 // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); 14 // ...} 15 // 16 // The MachineScheduler pass is only responsible for choosing the regions to be 17 // scheduled. Targets can override the DAG builder and scheduler without 18 // replacing the pass as follows: 19 // 20 // ScheduleDAGInstrs *<Target>PassConfig:: 21 // createMachineScheduler(MachineSchedContext *C) { 22 // return new CustomMachineScheduler(C); 23 // } 24 // 25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list 26 // scheduling while updating the instruction stream, register pressure, and live 27 // intervals. Most targets don't need to override the DAG builder and list 28 // scheduler, but subtargets that require custom scheduling heuristics may 29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for 30 // selecting the highest priority node from the list: 31 // 32 // ScheduleDAGInstrs *<Target>PassConfig:: 33 // createMachineScheduler(MachineSchedContext *C) { 34 // return new ScheduleDAGMILive(C, CustomStrategy(C)); 35 // } 36 // 37 // The DAG builder can also be customized in a sense by adding DAG mutations 38 // that will run after DAG building and before list scheduling. DAG mutations 39 // can adjust dependencies based on target-specific knowledge or add weak edges 40 // to aid heuristics: 41 // 42 // ScheduleDAGInstrs *<Target>PassConfig:: 43 // createMachineScheduler(MachineSchedContext *C) { 44 // ScheduleDAGMI *DAG = createGenericSchedLive(C); 45 // DAG->addMutation(new CustomDAGMutation(...)); 46 // return DAG; 47 // } 48 // 49 // A target that supports alternative schedulers can use the 50 // MachineSchedRegistry to allow command line selection. This can be done by 51 // implementing the following boilerplate: 52 // 53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { 54 // return new CustomMachineScheduler(C); 55 // } 56 // static MachineSchedRegistry 57 // SchedCustomRegistry("custom", "Run my target's custom scheduler", 58 // createCustomMachineSched); 59 // 60 // 61 // Finally, subtargets that don't need to implement custom heuristics but would 62 // like to configure the GenericScheduler's policy for a given scheduler region, 63 // including scheduling direction and register pressure tracking policy, can do 64 // this: 65 // 66 // void <SubTarget>Subtarget:: 67 // overrideSchedPolicy(MachineSchedPolicy &Policy, 68 // unsigned NumRegionInstrs) const { 69 // Policy.<Flag> = true; 70 // } 71 // 72 //===----------------------------------------------------------------------===// 73 74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H 75 #define LLVM_CODEGEN_MACHINESCHEDULER_H 76 77 #include "llvm/ADT/APInt.h" 78 #include "llvm/ADT/ArrayRef.h" 79 #include "llvm/ADT/BitVector.h" 80 #include "llvm/ADT/STLExtras.h" 81 #include "llvm/ADT/SmallVector.h" 82 #include "llvm/ADT/StringRef.h" 83 #include "llvm/ADT/Twine.h" 84 #include "llvm/CodeGen/MachineBasicBlock.h" 85 #include "llvm/CodeGen/MachinePassRegistry.h" 86 #include "llvm/CodeGen/RegisterPressure.h" 87 #include "llvm/CodeGen/ScheduleDAG.h" 88 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 89 #include "llvm/CodeGen/ScheduleDAGMutation.h" 90 #include "llvm/CodeGen/TargetSchedule.h" 91 #include "llvm/Support/CommandLine.h" 92 #include "llvm/Support/ErrorHandling.h" 93 #include <algorithm> 94 #include <cassert> 95 #include <llvm/Support/raw_ostream.h> 96 #include <memory> 97 #include <string> 98 #include <vector> 99 100 namespace llvm { 101 102 extern cl::opt<bool> ForceTopDown; 103 extern cl::opt<bool> ForceBottomUp; 104 extern cl::opt<bool> VerifyScheduling; 105 #ifndef NDEBUG 106 extern cl::opt<bool> ViewMISchedDAGs; 107 extern cl::opt<bool> PrintDAGs; 108 #else 109 extern const bool ViewMISchedDAGs; 110 extern const bool PrintDAGs; 111 #endif 112 113 class AAResults; 114 class LiveIntervals; 115 class MachineDominatorTree; 116 class MachineFunction; 117 class MachineInstr; 118 class MachineLoopInfo; 119 class RegisterClassInfo; 120 class SchedDFSResult; 121 class ScheduleHazardRecognizer; 122 class TargetInstrInfo; 123 class TargetPassConfig; 124 class TargetRegisterInfo; 125 126 /// MachineSchedContext provides enough context from the MachineScheduler pass 127 /// for the target to instantiate a scheduler. 128 struct MachineSchedContext { 129 MachineFunction *MF = nullptr; 130 const MachineLoopInfo *MLI = nullptr; 131 const MachineDominatorTree *MDT = nullptr; 132 const TargetPassConfig *PassConfig = nullptr; 133 AAResults *AA = nullptr; 134 LiveIntervals *LIS = nullptr; 135 136 RegisterClassInfo *RegClassInfo; 137 138 MachineSchedContext(); 139 MachineSchedContext &operator=(const MachineSchedContext &other) = delete; 140 MachineSchedContext(const MachineSchedContext &other) = delete; 141 virtual ~MachineSchedContext(); 142 }; 143 144 /// MachineSchedRegistry provides a selection of available machine instruction 145 /// schedulers. 146 class MachineSchedRegistry 147 : public MachinePassRegistryNode< 148 ScheduleDAGInstrs *(*)(MachineSchedContext *)> { 149 public: 150 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); 151 152 // RegisterPassParser requires a (misnamed) FunctionPassCtor type. 153 using FunctionPassCtor = ScheduleDAGCtor; 154 155 static MachinePassRegistry<ScheduleDAGCtor> Registry; 156 MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)157 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) 158 : MachinePassRegistryNode(N, D, C) { 159 Registry.Add(this); 160 } 161 ~MachineSchedRegistry()162 ~MachineSchedRegistry() { Registry.Remove(this); } 163 164 // Accessors. 165 // getNext()166 MachineSchedRegistry *getNext() const { 167 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); 168 } 169 getList()170 static MachineSchedRegistry *getList() { 171 return (MachineSchedRegistry *)Registry.getList(); 172 } 173 setListener(MachinePassRegistryListener<FunctionPassCtor> * L)174 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { 175 Registry.setListener(L); 176 } 177 }; 178 179 class ScheduleDAGMI; 180 181 /// Define a generic scheduling policy for targets that don't provide their own 182 /// MachineSchedStrategy. This can be overriden for each scheduling region 183 /// before building the DAG. 184 struct MachineSchedPolicy { 185 // Allow the scheduler to disable register pressure tracking. 186 bool ShouldTrackPressure = false; 187 /// Track LaneMasks to allow reordering of independent subregister writes 188 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() 189 bool ShouldTrackLaneMasks = false; 190 191 // Allow the scheduler to force top-down or bottom-up scheduling. If neither 192 // is true, the scheduler runs in both directions and converges. 193 bool OnlyTopDown = false; 194 bool OnlyBottomUp = false; 195 196 // Disable heuristic that tries to fetch nodes from long dependency chains 197 // first. 198 bool DisableLatencyHeuristic = false; 199 200 // Compute DFSResult for use in scheduling heuristics. 201 bool ComputeDFSResult = false; 202 203 MachineSchedPolicy() = default; 204 }; 205 206 /// MachineSchedStrategy - Interface to the scheduling algorithm used by 207 /// ScheduleDAGMI. 208 /// 209 /// Initialization sequence: 210 /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots 211 class MachineSchedStrategy { 212 virtual void anchor(); 213 214 public: 215 virtual ~MachineSchedStrategy() = default; 216 217 /// Optionally override the per-region scheduling policy. initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)218 virtual void initPolicy(MachineBasicBlock::iterator Begin, 219 MachineBasicBlock::iterator End, 220 unsigned NumRegionInstrs) {} 221 dumpPolicy()222 virtual void dumpPolicy() const {} 223 224 /// Check if pressure tracking is needed before building the DAG and 225 /// initializing this strategy. Called after initPolicy. shouldTrackPressure()226 virtual bool shouldTrackPressure() const { return true; } 227 228 /// Returns true if lanemasks should be tracked. LaneMask tracking is 229 /// necessary to reorder independent subregister defs for the same vreg. 230 /// This has to be enabled in combination with shouldTrackPressure(). shouldTrackLaneMasks()231 virtual bool shouldTrackLaneMasks() const { return false; } 232 233 // If this method returns true, handling of the scheduling regions 234 // themselves (in case of a scheduling boundary in MBB) will be done 235 // beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()236 virtual bool doMBBSchedRegionsTopDown() const { return false; } 237 238 /// Initialize the strategy after building the DAG for a new region. 239 virtual void initialize(ScheduleDAGMI *DAG) = 0; 240 241 /// Tell the strategy that MBB is about to be processed. enterMBB(MachineBasicBlock * MBB)242 virtual void enterMBB(MachineBasicBlock *MBB) {}; 243 244 /// Tell the strategy that current MBB is done. leaveMBB()245 virtual void leaveMBB() {}; 246 247 /// Notify this strategy that all roots have been released (including those 248 /// that depend on EntrySU or ExitSU). registerRoots()249 virtual void registerRoots() {} 250 251 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to 252 /// schedule the node at the top of the unscheduled region. Otherwise it will 253 /// be scheduled at the bottom. 254 virtual SUnit *pickNode(bool &IsTopNode) = 0; 255 256 /// Scheduler callback to notify that a new subtree is scheduled. scheduleTree(unsigned SubtreeID)257 virtual void scheduleTree(unsigned SubtreeID) {} 258 259 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an 260 /// instruction and updated scheduled/remaining flags in the DAG nodes. 261 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; 262 263 /// When all predecessor dependencies have been resolved, free this node for 264 /// top-down scheduling. 265 virtual void releaseTopNode(SUnit *SU) = 0; 266 267 /// When all successor dependencies have been resolved, free this node for 268 /// bottom-up scheduling. 269 virtual void releaseBottomNode(SUnit *SU) = 0; 270 }; 271 272 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply 273 /// schedules machine instructions according to the given MachineSchedStrategy 274 /// without much extra book-keeping. This is the common functionality between 275 /// PreRA and PostRA MachineScheduler. 276 class ScheduleDAGMI : public ScheduleDAGInstrs { 277 protected: 278 AAResults *AA; 279 LiveIntervals *LIS; 280 std::unique_ptr<MachineSchedStrategy> SchedImpl; 281 282 /// Ordered list of DAG postprocessing steps. 283 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 284 285 /// The top of the unscheduled zone. 286 MachineBasicBlock::iterator CurrentTop; 287 288 /// The bottom of the unscheduled zone. 289 MachineBasicBlock::iterator CurrentBottom; 290 291 /// Record the next node in a scheduled cluster. 292 const SUnit *NextClusterPred = nullptr; 293 const SUnit *NextClusterSucc = nullptr; 294 295 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 296 /// The number of instructions scheduled so far. Used to cut off the 297 /// scheduler at the point determined by misched-cutoff. 298 unsigned NumInstrsScheduled = 0; 299 #endif 300 301 public: ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)302 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 303 bool RemoveKillFlags) 304 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), 305 LIS(C->LIS), SchedImpl(std::move(S)) {} 306 307 // Provide a vtable anchor 308 ~ScheduleDAGMI() override; 309 310 /// If this method returns true, handling of the scheduling regions 311 /// themselves (in case of a scheduling boundary in MBB) will be done 312 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()313 bool doMBBSchedRegionsTopDown() const override { 314 return SchedImpl->doMBBSchedRegionsTopDown(); 315 } 316 317 // Returns LiveIntervals instance for use in DAG mutators and such. getLIS()318 LiveIntervals *getLIS() const { return LIS; } 319 320 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()321 virtual bool hasVRegLiveness() const { return false; } 322 323 /// Add a postprocessing step to the DAG builder. 324 /// Mutations are applied in the order that they are added after normal DAG 325 /// building and before MachineSchedStrategy initialization. 326 /// 327 /// ScheduleDAGMI takes ownership of the Mutation object. addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)328 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 329 if (Mutation) 330 Mutations.push_back(std::move(Mutation)); 331 } 332 top()333 MachineBasicBlock::iterator top() const { return CurrentTop; } bottom()334 MachineBasicBlock::iterator bottom() const { return CurrentBottom; } 335 336 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 337 /// region. This covers all instructions in a block, while schedule() may only 338 /// cover a subset. 339 void enterRegion(MachineBasicBlock *bb, 340 MachineBasicBlock::iterator begin, 341 MachineBasicBlock::iterator end, 342 unsigned regioninstrs) override; 343 344 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 345 /// reorderable instructions. 346 void schedule() override; 347 348 void startBlock(MachineBasicBlock *bb) override; 349 void finishBlock() override; 350 351 /// Change the position of an instruction within the basic block and update 352 /// live ranges and region boundary iterators. 353 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); 354 getNextClusterPred()355 const SUnit *getNextClusterPred() const { return NextClusterPred; } 356 getNextClusterSucc()357 const SUnit *getNextClusterSucc() const { return NextClusterSucc; } 358 359 void viewGraph(const Twine &Name, const Twine &Title) override; 360 void viewGraph() override; 361 362 protected: 363 // Top-Level entry points for the schedule() driver... 364 365 /// Apply each ScheduleDAGMutation step in order. This allows different 366 /// instances of ScheduleDAGMI to perform custom DAG postprocessing. 367 void postProcessDAG(); 368 369 /// Release ExitSU predecessors and setup scheduler queues. 370 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 371 372 /// Update scheduler DAG and queues after scheduling an instruction. 373 void updateQueues(SUnit *SU, bool IsTopNode); 374 375 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. 376 void placeDebugValues(); 377 378 /// dump the scheduled Sequence. 379 void dumpSchedule() const; 380 /// Print execution trace of the schedule top-down or bottom-up. 381 void dumpScheduleTraceTopDown() const; 382 void dumpScheduleTraceBottomUp() const; 383 384 // Lesser helpers... 385 bool checkSchedLimit(); 386 387 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 388 SmallVectorImpl<SUnit*> &BotRoots); 389 390 void releaseSucc(SUnit *SU, SDep *SuccEdge); 391 void releaseSuccessors(SUnit *SU); 392 void releasePred(SUnit *SU, SDep *PredEdge); 393 void releasePredecessors(SUnit *SU); 394 }; 395 396 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules 397 /// machine instructions while updating LiveIntervals and tracking regpressure. 398 class ScheduleDAGMILive : public ScheduleDAGMI { 399 protected: 400 RegisterClassInfo *RegClassInfo; 401 402 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees 403 /// will be empty. 404 SchedDFSResult *DFSResult = nullptr; 405 BitVector ScheduledTrees; 406 407 MachineBasicBlock::iterator LiveRegionEnd; 408 409 /// Maps vregs to the SUnits of their uses in the current scheduling region. 410 VReg2SUnitMultiMap VRegUses; 411 412 // Map each SU to its summary of pressure changes. This array is updated for 413 // liveness during bottom-up scheduling. Top-down scheduling may proceed but 414 // has no affect on the pressure diffs. 415 PressureDiffs SUPressureDiffs; 416 417 /// Register pressure in this region computed by initRegPressure. 418 bool ShouldTrackPressure = false; 419 bool ShouldTrackLaneMasks = false; 420 IntervalPressure RegPressure; 421 RegPressureTracker RPTracker; 422 423 /// List of pressure sets that exceed the target's pressure limit before 424 /// scheduling, listed in increasing set ID order. Each pressure set is paired 425 /// with its max pressure in the currently scheduled regions. 426 std::vector<PressureChange> RegionCriticalPSets; 427 428 /// The top of the unscheduled zone. 429 IntervalPressure TopPressure; 430 RegPressureTracker TopRPTracker; 431 432 /// The bottom of the unscheduled zone. 433 IntervalPressure BotPressure; 434 RegPressureTracker BotRPTracker; 435 436 public: ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)437 ScheduleDAGMILive(MachineSchedContext *C, 438 std::unique_ptr<MachineSchedStrategy> S) 439 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), 440 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), 441 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} 442 443 ~ScheduleDAGMILive() override; 444 445 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()446 bool hasVRegLiveness() const override { return true; } 447 448 /// Return true if register pressure tracking is enabled. isTrackingPressure()449 bool isTrackingPressure() const { return ShouldTrackPressure; } 450 451 /// Get current register pressure for the top scheduled instructions. getTopPressure()452 const IntervalPressure &getTopPressure() const { return TopPressure; } getTopRPTracker()453 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } 454 455 /// Get current register pressure for the bottom scheduled instructions. getBotPressure()456 const IntervalPressure &getBotPressure() const { return BotPressure; } getBotRPTracker()457 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } 458 459 /// Get register pressure for the entire scheduling region before scheduling. getRegPressure()460 const IntervalPressure &getRegPressure() const { return RegPressure; } 461 getRegionCriticalPSets()462 const std::vector<PressureChange> &getRegionCriticalPSets() const { 463 return RegionCriticalPSets; 464 } 465 getPressureDiff(const SUnit * SU)466 PressureDiff &getPressureDiff(const SUnit *SU) { 467 return SUPressureDiffs[SU->NodeNum]; 468 } getPressureDiff(const SUnit * SU)469 const PressureDiff &getPressureDiff(const SUnit *SU) const { 470 return SUPressureDiffs[SU->NodeNum]; 471 } 472 473 /// Compute a DFSResult after DAG building is complete, and before any 474 /// queue comparisons. 475 void computeDFSResult(); 476 477 /// Return a non-null DFS result if the scheduling strategy initialized it. getDFSResult()478 const SchedDFSResult *getDFSResult() const { return DFSResult; } 479 getScheduledTrees()480 BitVector &getScheduledTrees() { return ScheduledTrees; } 481 482 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 483 /// region. This covers all instructions in a block, while schedule() may only 484 /// cover a subset. 485 void enterRegion(MachineBasicBlock *bb, 486 MachineBasicBlock::iterator begin, 487 MachineBasicBlock::iterator end, 488 unsigned regioninstrs) override; 489 490 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 491 /// reorderable instructions. 492 void schedule() override; 493 494 /// Compute the cyclic critical path through the DAG. 495 unsigned computeCyclicCriticalPath(); 496 497 void dump() const override; 498 499 protected: 500 // Top-Level entry points for the schedule() driver... 501 502 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking 503 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG 504 /// region, TopTracker and BottomTracker will be initialized to the top and 505 /// bottom of the DAG region without covereing any unscheduled instruction. 506 void buildDAGWithRegPressure(); 507 508 /// Release ExitSU predecessors and setup scheduler queues. Re-position 509 /// the Top RP tracker in case the region beginning has changed. 510 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 511 512 /// Move an instruction and update register pressure. 513 void scheduleMI(SUnit *SU, bool IsTopNode); 514 515 // Lesser helpers... 516 517 void initRegPressure(); 518 519 void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); 520 521 void updateScheduledPressure(const SUnit *SU, 522 const std::vector<unsigned> &NewMaxPressure); 523 524 void collectVRegUses(SUnit &SU); 525 }; 526 527 //===----------------------------------------------------------------------===// 528 /// 529 /// Helpers for implementing custom MachineSchedStrategy classes. These take 530 /// care of the book-keeping associated with list scheduling heuristics. 531 /// 532 //===----------------------------------------------------------------------===// 533 534 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience 535 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified 536 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. 537 /// 538 /// This is a convenience class that may be used by implementations of 539 /// MachineSchedStrategy. 540 class ReadyQueue { 541 unsigned ID; 542 std::string Name; 543 std::vector<SUnit*> Queue; 544 545 public: ReadyQueue(unsigned id,const Twine & name)546 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} 547 getID()548 unsigned getID() const { return ID; } 549 getName()550 StringRef getName() const { return Name; } 551 552 // SU is in this queue if it's NodeQueueID is a superset of this ID. isInQueue(SUnit * SU)553 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } 554 empty()555 bool empty() const { return Queue.empty(); } 556 clear()557 void clear() { Queue.clear(); } 558 size()559 unsigned size() const { return Queue.size(); } 560 561 using iterator = std::vector<SUnit*>::iterator; 562 begin()563 iterator begin() { return Queue.begin(); } 564 end()565 iterator end() { return Queue.end(); } 566 elements()567 ArrayRef<SUnit*> elements() { return Queue; } 568 find(SUnit * SU)569 iterator find(SUnit *SU) { return llvm::find(Queue, SU); } 570 push(SUnit * SU)571 void push(SUnit *SU) { 572 Queue.push_back(SU); 573 SU->NodeQueueId |= ID; 574 } 575 remove(iterator I)576 iterator remove(iterator I) { 577 (*I)->NodeQueueId &= ~ID; 578 *I = Queue.back(); 579 unsigned idx = I - Queue.begin(); 580 Queue.pop_back(); 581 return Queue.begin() + idx; 582 } 583 584 void dump() const; 585 }; 586 587 /// Summarize the unscheduled region. 588 struct SchedRemainder { 589 // Critical path through the DAG in expected latency. 590 unsigned CriticalPath; 591 unsigned CyclicCritPath; 592 593 // Scaled count of micro-ops left to schedule. 594 unsigned RemIssueCount; 595 596 bool IsAcyclicLatencyLimited; 597 598 // Unscheduled resources 599 SmallVector<unsigned, 16> RemainingCounts; 600 SchedRemainderSchedRemainder601 SchedRemainder() { reset(); } 602 resetSchedRemainder603 void reset() { 604 CriticalPath = 0; 605 CyclicCritPath = 0; 606 RemIssueCount = 0; 607 IsAcyclicLatencyLimited = false; 608 RemainingCounts.clear(); 609 } 610 611 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 612 }; 613 614 /// ResourceSegments are a collection of intervals closed on the 615 /// left and opened on the right: 616 /// 617 /// list{ [a1, b1), [a2, b2), ..., [a_N, b_N) } 618 /// 619 /// The collection has the following properties: 620 /// 621 /// 1. The list is ordered: a_i < b_i and b_i < a_(i+1) 622 /// 623 /// 2. The intervals in the collection do not intersect each other. 624 /// 625 /// A \ref ResourceSegments instance represents the cycle 626 /// reservation history of the instance of and individual resource. 627 class ResourceSegments { 628 public: 629 /// Represents an interval of discrete integer values closed on 630 /// the left and open on the right: [a, b). 631 typedef std::pair<int64_t, int64_t> IntervalTy; 632 633 /// Adds an interval [a, b) to the collection of the instance. 634 /// 635 /// When adding [a, b[ to the collection, the operation merges the 636 /// adjacent intervals. For example 637 /// 638 /// 0 1 2 3 4 5 6 7 8 9 10 639 /// [-----) [--) [--) 640 /// + [--) 641 /// = [-----------) [--) 642 /// 643 /// To be able to debug duplicate resource usage, the function has 644 /// assertion that checks that no interval should be added if it 645 /// overlaps any of the intervals in the collection. We can 646 /// require this because by definition a \ref ResourceSegments is 647 /// attached only to an individual resource instance. 648 void add(IntervalTy A, const unsigned CutOff = 10); 649 650 public: 651 /// Checks whether intervals intersect. 652 static bool intersects(IntervalTy A, IntervalTy B); 653 654 /// These function return the interval used by a resource in bottom and top 655 /// scheduling. 656 /// 657 /// Consider an instruction that uses resources X0, X1 and X2 as follows: 658 /// 659 /// X0 X1 X1 X2 +--------+-------------+--------------+ 660 /// |Resource|AcquireAtCycle|ReleaseAtCycle| 661 /// +--------+-------------+--------------+ 662 /// | X0 | 0 | 1 | 663 /// +--------+-------------+--------------+ 664 /// | X1 | 1 | 3 | 665 /// +--------+-------------+--------------+ 666 /// | X2 | 3 | 4 | 667 /// +--------+-------------+--------------+ 668 /// 669 /// If we can schedule the instruction at cycle C, we need to 670 /// compute the interval of the resource as follows: 671 /// 672 /// # TOP DOWN SCHEDULING 673 /// 674 /// Cycles scheduling flows to the _right_, in the same direction 675 /// of time. 676 /// 677 /// C 1 2 3 4 5 ... 678 /// ------|------|------|------|------|------|-----> 679 /// X0 X1 X1 X2 ---> direction of time 680 /// X0 [C, C+1) 681 /// X1 [C+1, C+3) 682 /// X2 [C+3, C+4) 683 /// 684 /// Therefore, the formula to compute the interval for a resource 685 /// of an instruction that can be scheduled at cycle C in top-down 686 /// scheduling is: 687 /// 688 /// [C+AcquireAtCycle, C+ReleaseAtCycle) 689 /// 690 /// 691 /// # BOTTOM UP SCHEDULING 692 /// 693 /// Cycles scheduling flows to the _left_, in opposite direction 694 /// of time. 695 /// 696 /// In bottom up scheduling, the scheduling happens in opposite 697 /// direction to the execution of the cycles of the 698 /// instruction. When the instruction is scheduled at cycle `C`, 699 /// the resources are allocated in the past relative to `C`: 700 /// 701 /// 2 1 C -1 -2 -3 -4 -5 ... 702 /// <-----|------|------|------|------|------|------|------|--- 703 /// X0 X1 X1 X2 ---> direction of time 704 /// X0 (C+1, C] 705 /// X1 (C, C-2] 706 /// X2 (C-2, C-3] 707 /// 708 /// Therefore, the formula to compute the interval for a resource 709 /// of an instruction that can be scheduled at cycle C in bottom-up 710 /// scheduling is: 711 /// 712 /// [C-ReleaseAtCycle+1, C-AcquireAtCycle+1) 713 /// 714 /// 715 /// NOTE: In both cases, the number of cycles booked by a 716 /// resources is the value (ReleaseAtCycle - AcquireAtCycle). getResourceIntervalBottom(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)717 static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, 718 unsigned ReleaseAtCycle) { 719 return std::make_pair<long, long>((long)C - (long)ReleaseAtCycle + 1L, 720 (long)C - (long)AcquireAtCycle + 1L); 721 } getResourceIntervalTop(unsigned C,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)722 static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, 723 unsigned ReleaseAtCycle) { 724 return std::make_pair<long, long>((long)C + (long)AcquireAtCycle, 725 (long)C + (long)ReleaseAtCycle); 726 } 727 728 private: 729 /// Finds the first cycle in which a resource can be allocated. 730 /// 731 /// The function uses the \param IntervalBuider [*] to build a 732 /// resource interval [a, b[ out of the input parameters \param 733 /// CurrCycle, \param AcquireAtCycle and \param ReleaseAtCycle. 734 /// 735 /// The function then loops through the intervals in the ResourceSegments 736 /// and shifts the interval [a, b[ and the ReturnCycle to the 737 /// right until there is no intersection between the intervals of 738 /// the \ref ResourceSegments instance and the new shifted [a, b[. When 739 /// this condition is met, the ReturnCycle (which 740 /// correspond to the cycle in which the resource can be 741 /// allocated) is returned. 742 /// 743 /// c = CurrCycle in input 744 /// c 1 2 3 4 5 6 7 8 9 10 ... ---> (time 745 /// flow) 746 /// ResourceSegments... [---) [-------) [-----------) 747 /// c [1 3[ -> AcquireAtCycle=1, ReleaseAtCycle=3 748 /// ++c [1 3) 749 /// ++c [1 3) 750 /// ++c [1 3) 751 /// ++c [1 3) 752 /// ++c [1 3) ---> returns c 753 /// incremented by 5 (c+5) 754 /// 755 /// 756 /// Notice that for bottom-up scheduling the diagram is slightly 757 /// different because the current cycle c is always on the right 758 /// of the interval [a, b) (see \ref 759 /// `getResourceIntervalBottom`). This is because the cycle 760 /// increments for bottom-up scheduling moved in the direction 761 /// opposite to the direction of time: 762 /// 763 /// --------> direction of time. 764 /// XXYZZZ (resource usage) 765 /// --------> direction of top-down execution cycles. 766 /// <-------- direction of bottom-up execution cycles. 767 /// 768 /// Even though bottom-up scheduling moves against the flow of 769 /// time, the algorithm used to find the first free slot in between 770 /// intervals is the same as for top-down scheduling. 771 /// 772 /// [*] See \ref `getResourceIntervalTop` and 773 /// \ref `getResourceIntervalBottom` to see how such resource intervals 774 /// are built. 775 unsigned getFirstAvailableAt( 776 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle, 777 std::function<IntervalTy(unsigned, unsigned, unsigned)> IntervalBuilder) 778 const; 779 780 public: 781 /// getFirstAvailableAtFromBottom and getFirstAvailableAtFromTop 782 /// should be merged in a single function in which a function that 783 /// creates the `NewInterval` is passed as a parameter. getFirstAvailableAtFromBottom(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)784 unsigned getFirstAvailableAtFromBottom(unsigned CurrCycle, 785 unsigned AcquireAtCycle, 786 unsigned ReleaseAtCycle) const { 787 return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle, 788 getResourceIntervalBottom); 789 } getFirstAvailableAtFromTop(unsigned CurrCycle,unsigned AcquireAtCycle,unsigned ReleaseAtCycle)790 unsigned getFirstAvailableAtFromTop(unsigned CurrCycle, 791 unsigned AcquireAtCycle, 792 unsigned ReleaseAtCycle) const { 793 return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle, 794 getResourceIntervalTop); 795 } 796 797 private: 798 std::list<IntervalTy> _Intervals; 799 /// Merge all adjacent intervals in the collection. For all pairs 800 /// of adjacient intervals, it performs [a, b) + [b, c) -> [a, c). 801 /// 802 /// Before performing the merge operation, the intervals are 803 /// sorted with \ref sort_predicate. 804 void sortAndMerge(); 805 806 public: 807 // constructor for empty set ResourceSegments()808 explicit ResourceSegments(){}; empty()809 bool empty() const { return _Intervals.empty(); } ResourceSegments(const std::list<IntervalTy> & Intervals)810 explicit ResourceSegments(const std::list<IntervalTy> &Intervals) 811 : _Intervals(Intervals) { 812 sortAndMerge(); 813 } 814 815 friend bool operator==(const ResourceSegments &c1, 816 const ResourceSegments &c2) { 817 return c1._Intervals == c2._Intervals; 818 } 819 friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os, 820 const ResourceSegments &Segments) { 821 os << "{ "; 822 for (auto p : Segments._Intervals) 823 os << "[" << p.first << ", " << p.second << "), "; 824 os << "}\n"; 825 return os; 826 } 827 }; 828 829 /// Each Scheduling boundary is associated with ready queues. It tracks the 830 /// current cycle in the direction of movement, and maintains the state 831 /// of "hazards" and other interlocks at the current cycle. 832 class SchedBoundary { 833 public: 834 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 835 enum { 836 TopQID = 1, 837 BotQID = 2, 838 LogMaxQID = 2 839 }; 840 841 ScheduleDAGMI *DAG = nullptr; 842 const TargetSchedModel *SchedModel = nullptr; 843 SchedRemainder *Rem = nullptr; 844 845 ReadyQueue Available; 846 ReadyQueue Pending; 847 848 ScheduleHazardRecognizer *HazardRec = nullptr; 849 850 private: 851 /// True if the pending Q should be checked/updated before scheduling another 852 /// instruction. 853 bool CheckPending; 854 855 /// Number of cycles it takes to issue the instructions scheduled in this 856 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. 857 /// See getStalls(). 858 unsigned CurrCycle; 859 860 /// Micro-ops issued in the current cycle 861 unsigned CurrMOps; 862 863 /// MinReadyCycle - Cycle of the soonest available instruction. 864 unsigned MinReadyCycle; 865 866 // The expected latency of the critical path in this scheduled zone. 867 unsigned ExpectedLatency; 868 869 // The latency of dependence chains leading into this zone. 870 // For each node scheduled bottom-up: DLat = max DLat, N.Depth. 871 // For each cycle scheduled: DLat -= 1. 872 unsigned DependentLatency; 873 874 /// Count the scheduled (issued) micro-ops that can be retired by 875 /// time=CurrCycle assuming the first scheduled instr is retired at time=0. 876 unsigned RetiredMOps; 877 878 // Count scheduled resources that have been executed. Resources are 879 // considered executed if they become ready in the time that it takes to 880 // saturate any resource including the one in question. Counts are scaled 881 // for direct comparison with other resources. Counts can be compared with 882 // MOps * getMicroOpFactor and Latency * getLatencyFactor. 883 SmallVector<unsigned, 16> ExecutedResCounts; 884 885 /// Cache the max count for a single resource. 886 unsigned MaxExecutedResCount; 887 888 // Cache the critical resources ID in this scheduled zone. 889 unsigned ZoneCritResIdx; 890 891 // Is the scheduled region resource limited vs. latency limited. 892 bool IsResourceLimited; 893 894 public: 895 private: 896 /// Record how resources have been allocated across the cycles of 897 /// the execution. 898 std::map<unsigned, ResourceSegments> ReservedResourceSegments; 899 std::vector<unsigned> ReservedCycles; 900 /// For each PIdx, stores first index into ReservedResourceSegments that 901 /// corresponds to it. 902 /// 903 /// For example, consider the following 3 resources (ResourceCount = 904 /// 3): 905 /// 906 /// +------------+--------+ 907 /// |ResourceName|NumUnits| 908 /// +------------+--------+ 909 /// | X | 2 | 910 /// +------------+--------+ 911 /// | Y | 3 | 912 /// +------------+--------+ 913 /// | Z | 1 | 914 /// +------------+--------+ 915 /// 916 /// In this case, the total number of resource instances is 6. The 917 /// vector \ref ReservedResourceSegments will have a slot for each instance. 918 /// The vector \ref ReservedCyclesIndex will track at what index the first 919 /// instance of the resource is found in the vector of \ref 920 /// ReservedResourceSegments: 921 /// 922 /// Indexes of instances in 923 /// ReservedResourceSegments 924 /// 925 /// 0 1 2 3 4 5 926 /// ReservedCyclesIndex[0] = 0; [X0, X1, 927 /// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2 928 /// ReservedCyclesIndex[2] = 5; Z 929 SmallVector<unsigned, 16> ReservedCyclesIndex; 930 931 // For each PIdx, stores the resource group IDs of its subunits 932 SmallVector<APInt, 16> ResourceGroupSubUnitMasks; 933 934 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 935 // Remember the greatest possible stall as an upper bound on the number of 936 // times we should retry the pending queue because of a hazard. 937 unsigned MaxObservedStall; 938 #endif 939 940 public: 941 /// Pending queues extend the ready queues with the same ID and the 942 /// PendingFlag set. SchedBoundary(unsigned ID,const Twine & Name)943 SchedBoundary(unsigned ID, const Twine &Name): 944 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") { 945 reset(); 946 } 947 SchedBoundary &operator=(const SchedBoundary &other) = delete; 948 SchedBoundary(const SchedBoundary &other) = delete; 949 ~SchedBoundary(); 950 951 void reset(); 952 953 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 954 SchedRemainder *rem); 955 isTop()956 bool isTop() const { 957 return Available.getID() == TopQID; 958 } 959 960 /// Number of cycles to issue the instructions scheduled in this zone. getCurrCycle()961 unsigned getCurrCycle() const { return CurrCycle; } 962 963 /// Micro-ops issued in the current cycle getCurrMOps()964 unsigned getCurrMOps() const { return CurrMOps; } 965 966 // The latency of dependence chains leading into this zone. getDependentLatency()967 unsigned getDependentLatency() const { return DependentLatency; } 968 969 /// Get the number of latency cycles "covered" by the scheduled 970 /// instructions. This is the larger of the critical path within the zone 971 /// and the number of cycles required to issue the instructions. getScheduledLatency()972 unsigned getScheduledLatency() const { 973 return std::max(ExpectedLatency, CurrCycle); 974 } 975 getUnscheduledLatency(SUnit * SU)976 unsigned getUnscheduledLatency(SUnit *SU) const { 977 return isTop() ? SU->getHeight() : SU->getDepth(); 978 } 979 getResourceCount(unsigned ResIdx)980 unsigned getResourceCount(unsigned ResIdx) const { 981 return ExecutedResCounts[ResIdx]; 982 } 983 984 /// Get the scaled count of scheduled micro-ops and resources, including 985 /// executed resources. getCriticalCount()986 unsigned getCriticalCount() const { 987 if (!ZoneCritResIdx) 988 return RetiredMOps * SchedModel->getMicroOpFactor(); 989 return getResourceCount(ZoneCritResIdx); 990 } 991 992 /// Get a scaled count for the minimum execution time of the scheduled 993 /// micro-ops that are ready to execute by getExecutedCount. Notice the 994 /// feedback loop. getExecutedCount()995 unsigned getExecutedCount() const { 996 return std::max(CurrCycle * SchedModel->getLatencyFactor(), 997 MaxExecutedResCount); 998 } 999 getZoneCritResIdx()1000 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } 1001 1002 // Is the scheduled region resource limited vs. latency limited. isResourceLimited()1003 bool isResourceLimited() const { return IsResourceLimited; } 1004 1005 /// Get the difference between the given SUnit's ready time and the current 1006 /// cycle. 1007 unsigned getLatencyStallCycles(SUnit *SU); 1008 1009 unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, 1010 unsigned ReleaseAtCycle, 1011 unsigned AcquireAtCycle); 1012 1013 std::pair<unsigned, unsigned> getNextResourceCycle(const MCSchedClassDesc *SC, 1014 unsigned PIdx, 1015 unsigned ReleaseAtCycle, 1016 unsigned AcquireAtCycle); 1017 isUnbufferedGroup(unsigned PIdx)1018 bool isUnbufferedGroup(unsigned PIdx) const { 1019 return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin && 1020 !SchedModel->getProcResource(PIdx)->BufferSize; 1021 } 1022 1023 bool checkHazard(SUnit *SU); 1024 1025 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); 1026 1027 unsigned getOtherResourceCount(unsigned &OtherCritIdx); 1028 1029 /// Release SU to make it ready. If it's not in hazard, remove it from 1030 /// pending queue (if already in) and push into available queue. 1031 /// Otherwise, push the SU into pending queue. 1032 /// 1033 /// @param SU The unit to be released. 1034 /// @param ReadyCycle Until which cycle the unit is ready. 1035 /// @param InPQueue Whether SU is already in pending queue. 1036 /// @param Idx Position offset in pending queue (if in it). 1037 void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, 1038 unsigned Idx = 0); 1039 1040 void bumpCycle(unsigned NextCycle); 1041 1042 void incExecutedResources(unsigned PIdx, unsigned Count); 1043 1044 unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, 1045 unsigned Cycles, unsigned ReadyCycle, 1046 unsigned StartAtCycle); 1047 1048 void bumpNode(SUnit *SU); 1049 1050 void releasePending(); 1051 1052 void removeReady(SUnit *SU); 1053 1054 /// Call this before applying any other heuristics to the Available queue. 1055 /// Updates the Available/Pending Q's if necessary and returns the single 1056 /// available instruction, or NULL if there are multiple candidates. 1057 SUnit *pickOnlyChoice(); 1058 1059 /// Dump the state of the information that tracks resource usage. 1060 void dumpReservedCycles() const; 1061 void dumpScheduledState() const; 1062 }; 1063 1064 /// Base class for GenericScheduler. This class maintains information about 1065 /// scheduling candidates based on TargetSchedModel making it easy to implement 1066 /// heuristics for either preRA or postRA scheduling. 1067 class GenericSchedulerBase : public MachineSchedStrategy { 1068 public: 1069 /// Represent the type of SchedCandidate found within a single queue. 1070 /// pickNodeBidirectional depends on these listed by decreasing priority. 1071 enum CandReason : uint8_t { 1072 NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, 1073 RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 1074 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 1075 1076 #ifndef NDEBUG 1077 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); 1078 #endif 1079 1080 /// Policy for scheduling the next instruction in the candidate's zone. 1081 struct CandPolicy { 1082 bool ReduceLatency = false; 1083 unsigned ReduceResIdx = 0; 1084 unsigned DemandResIdx = 0; 1085 1086 CandPolicy() = default; 1087 1088 bool operator==(const CandPolicy &RHS) const { 1089 return ReduceLatency == RHS.ReduceLatency && 1090 ReduceResIdx == RHS.ReduceResIdx && 1091 DemandResIdx == RHS.DemandResIdx; 1092 } 1093 bool operator!=(const CandPolicy &RHS) const { 1094 return !(*this == RHS); 1095 } 1096 }; 1097 1098 /// Status of an instruction's critical resource consumption. 1099 struct SchedResourceDelta { 1100 // Count critical resources in the scheduled region required by SU. 1101 unsigned CritResources = 0; 1102 1103 // Count critical resources from another region consumed by SU. 1104 unsigned DemandedResources = 0; 1105 1106 SchedResourceDelta() = default; 1107 1108 bool operator==(const SchedResourceDelta &RHS) const { 1109 return CritResources == RHS.CritResources 1110 && DemandedResources == RHS.DemandedResources; 1111 } 1112 bool operator!=(const SchedResourceDelta &RHS) const { 1113 return !operator==(RHS); 1114 } 1115 }; 1116 1117 /// Store the state used by GenericScheduler heuristics, required for the 1118 /// lifetime of one invocation of pickNode(). 1119 struct SchedCandidate { 1120 CandPolicy Policy; 1121 1122 // The best SUnit candidate. 1123 SUnit *SU; 1124 1125 // The reason for this candidate. 1126 CandReason Reason; 1127 1128 // Whether this candidate should be scheduled at top/bottom. 1129 bool AtTop; 1130 1131 // Register pressure values for the best candidate. 1132 RegPressureDelta RPDelta; 1133 1134 // Critical resource consumption of the best candidate. 1135 SchedResourceDelta ResDelta; 1136 SchedCandidateSchedCandidate1137 SchedCandidate() { reset(CandPolicy()); } SchedCandidateSchedCandidate1138 SchedCandidate(const CandPolicy &Policy) { reset(Policy); } 1139 resetSchedCandidate1140 void reset(const CandPolicy &NewPolicy) { 1141 Policy = NewPolicy; 1142 SU = nullptr; 1143 Reason = NoCand; 1144 AtTop = false; 1145 RPDelta = RegPressureDelta(); 1146 ResDelta = SchedResourceDelta(); 1147 } 1148 isValidSchedCandidate1149 bool isValid() const { return SU; } 1150 1151 // Copy the status of another candidate without changing policy. setBestSchedCandidate1152 void setBest(SchedCandidate &Best) { 1153 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 1154 SU = Best.SU; 1155 Reason = Best.Reason; 1156 AtTop = Best.AtTop; 1157 RPDelta = Best.RPDelta; 1158 ResDelta = Best.ResDelta; 1159 } 1160 1161 void initResourceDelta(const ScheduleDAGMI *DAG, 1162 const TargetSchedModel *SchedModel); 1163 }; 1164 1165 protected: 1166 const MachineSchedContext *Context; 1167 const TargetSchedModel *SchedModel = nullptr; 1168 const TargetRegisterInfo *TRI = nullptr; 1169 1170 SchedRemainder Rem; 1171 GenericSchedulerBase(const MachineSchedContext * C)1172 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} 1173 1174 void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, 1175 SchedBoundary *OtherZone); 1176 1177 #ifndef NDEBUG 1178 void traceCandidate(const SchedCandidate &Cand); 1179 #endif 1180 1181 private: 1182 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, 1183 bool ComputeRemLatency, unsigned &RemLatency) const; 1184 }; 1185 1186 // Utility functions used by heuristics in tryCandidate(). 1187 bool tryLess(int TryVal, int CandVal, 1188 GenericSchedulerBase::SchedCandidate &TryCand, 1189 GenericSchedulerBase::SchedCandidate &Cand, 1190 GenericSchedulerBase::CandReason Reason); 1191 bool tryGreater(int TryVal, int CandVal, 1192 GenericSchedulerBase::SchedCandidate &TryCand, 1193 GenericSchedulerBase::SchedCandidate &Cand, 1194 GenericSchedulerBase::CandReason Reason); 1195 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 1196 GenericSchedulerBase::SchedCandidate &Cand, 1197 SchedBoundary &Zone); 1198 bool tryPressure(const PressureChange &TryP, 1199 const PressureChange &CandP, 1200 GenericSchedulerBase::SchedCandidate &TryCand, 1201 GenericSchedulerBase::SchedCandidate &Cand, 1202 GenericSchedulerBase::CandReason Reason, 1203 const TargetRegisterInfo *TRI, 1204 const MachineFunction &MF); 1205 unsigned getWeakLeft(const SUnit *SU, bool isTop); 1206 int biasPhysReg(const SUnit *SU, bool isTop); 1207 1208 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 1209 /// the schedule. 1210 class GenericScheduler : public GenericSchedulerBase { 1211 public: GenericScheduler(const MachineSchedContext * C)1212 GenericScheduler(const MachineSchedContext *C): 1213 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), 1214 Bot(SchedBoundary::BotQID, "BotQ") {} 1215 1216 void initPolicy(MachineBasicBlock::iterator Begin, 1217 MachineBasicBlock::iterator End, 1218 unsigned NumRegionInstrs) override; 1219 1220 void dumpPolicy() const override; 1221 shouldTrackPressure()1222 bool shouldTrackPressure() const override { 1223 return RegionPolicy.ShouldTrackPressure; 1224 } 1225 shouldTrackLaneMasks()1226 bool shouldTrackLaneMasks() const override { 1227 return RegionPolicy.ShouldTrackLaneMasks; 1228 } 1229 1230 void initialize(ScheduleDAGMI *dag) override; 1231 1232 SUnit *pickNode(bool &IsTopNode) override; 1233 1234 void schedNode(SUnit *SU, bool IsTopNode) override; 1235 releaseTopNode(SUnit * SU)1236 void releaseTopNode(SUnit *SU) override { 1237 if (SU->isScheduled) 1238 return; 1239 1240 Top.releaseNode(SU, SU->TopReadyCycle, false); 1241 TopCand.SU = nullptr; 1242 } 1243 releaseBottomNode(SUnit * SU)1244 void releaseBottomNode(SUnit *SU) override { 1245 if (SU->isScheduled) 1246 return; 1247 1248 Bot.releaseNode(SU, SU->BotReadyCycle, false); 1249 BotCand.SU = nullptr; 1250 } 1251 1252 void registerRoots() override; 1253 1254 protected: 1255 ScheduleDAGMILive *DAG = nullptr; 1256 1257 MachineSchedPolicy RegionPolicy; 1258 1259 // State of the top and bottom scheduled instruction boundaries. 1260 SchedBoundary Top; 1261 SchedBoundary Bot; 1262 1263 /// Candidate last picked from Top boundary. 1264 SchedCandidate TopCand; 1265 /// Candidate last picked from Bot boundary. 1266 SchedCandidate BotCand; 1267 1268 void checkAcyclicLatency(); 1269 1270 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 1271 const RegPressureTracker &RPTracker, 1272 RegPressureTracker &TempTracker); 1273 1274 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 1275 SchedBoundary *Zone) const; 1276 1277 SUnit *pickNodeBidirectional(bool &IsTopNode); 1278 1279 void pickNodeFromQueue(SchedBoundary &Zone, 1280 const CandPolicy &ZonePolicy, 1281 const RegPressureTracker &RPTracker, 1282 SchedCandidate &Candidate); 1283 1284 void reschedulePhysReg(SUnit *SU, bool isTop); 1285 }; 1286 1287 /// PostGenericScheduler - Interface to the scheduling algorithm used by 1288 /// ScheduleDAGMI. 1289 /// 1290 /// Callbacks from ScheduleDAGMI: 1291 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... 1292 class PostGenericScheduler : public GenericSchedulerBase { 1293 protected: 1294 ScheduleDAGMI *DAG = nullptr; 1295 SchedBoundary Top; 1296 SchedBoundary Bot; 1297 MachineSchedPolicy RegionPolicy; 1298 1299 /// Candidate last picked from Top boundary. 1300 SchedCandidate TopCand; 1301 /// Candidate last picked from Bot boundary. 1302 SchedCandidate BotCand; 1303 1304 public: PostGenericScheduler(const MachineSchedContext * C)1305 PostGenericScheduler(const MachineSchedContext *C) 1306 : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), 1307 Bot(SchedBoundary::BotQID, "BotQ") {} 1308 1309 ~PostGenericScheduler() override = default; 1310 1311 void initPolicy(MachineBasicBlock::iterator Begin, 1312 MachineBasicBlock::iterator End, 1313 unsigned NumRegionInstrs) override; 1314 1315 /// PostRA scheduling does not track pressure. shouldTrackPressure()1316 bool shouldTrackPressure() const override { return false; } 1317 1318 void initialize(ScheduleDAGMI *Dag) override; 1319 1320 void registerRoots() override; 1321 1322 SUnit *pickNode(bool &IsTopNode) override; 1323 1324 SUnit *pickNodeBidirectional(bool &IsTopNode); 1325 scheduleTree(unsigned SubtreeID)1326 void scheduleTree(unsigned SubtreeID) override { 1327 llvm_unreachable("PostRA scheduler does not support subtree analysis."); 1328 } 1329 1330 void schedNode(SUnit *SU, bool IsTopNode) override; 1331 releaseTopNode(SUnit * SU)1332 void releaseTopNode(SUnit *SU) override { 1333 if (SU->isScheduled) 1334 return; 1335 Top.releaseNode(SU, SU->TopReadyCycle, false); 1336 TopCand.SU = nullptr; 1337 } 1338 releaseBottomNode(SUnit * SU)1339 void releaseBottomNode(SUnit *SU) override { 1340 if (SU->isScheduled) 1341 return; 1342 Bot.releaseNode(SU, SU->BotReadyCycle, false); 1343 BotCand.SU = nullptr; 1344 } 1345 1346 protected: 1347 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); 1348 1349 void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand); 1350 }; 1351 1352 /// Create the standard converging machine scheduler. This will be used as the 1353 /// default scheduler if the target does not set a default. 1354 /// Adds default DAG mutations. 1355 ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); 1356 1357 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 1358 ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); 1359 1360 /// If ReorderWhileClustering is set to true, no attempt will be made to 1361 /// reduce reordering due to store clustering. 1362 std::unique_ptr<ScheduleDAGMutation> 1363 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1364 const TargetRegisterInfo *TRI, 1365 bool ReorderWhileClustering = false); 1366 1367 /// If ReorderWhileClustering is set to true, no attempt will be made to 1368 /// reduce reordering due to store clustering. 1369 std::unique_ptr<ScheduleDAGMutation> 1370 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1371 const TargetRegisterInfo *TRI, 1372 bool ReorderWhileClustering = false); 1373 1374 std::unique_ptr<ScheduleDAGMutation> 1375 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1376 const TargetRegisterInfo *TRI); 1377 1378 } // end namespace llvm 1379 1380 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H 1381