1 //===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements a top-down list scheduler, using standard algorithms. 10 // The basic approach uses a priority queue of available nodes to schedule. 11 // One at a time, nodes are taken from the priority queue (thus in priority 12 // order), checked for legality to schedule, and emitted if legal. 13 // 14 // Nodes may not be legal to schedule either due to structural hazards (e.g. 15 // pipeline or resource constraints) or because an input to the instruction has 16 // not completed execution. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "llvm/CodeGen/PostRASchedulerList.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/CodeGen/AntiDepBreaker.h" 24 #include "llvm/CodeGen/LatencyPriorityQueue.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineLoopInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/RegisterClassInfo.h" 30 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 31 #include "llvm/CodeGen/ScheduleDAGMutation.h" 32 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/CodeGen/TargetPassConfig.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/Config/llvm-config.h" 37 #include "llvm/InitializePasses.h" 38 #include "llvm/Pass.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/Target/TargetMachine.h" 44 using namespace llvm; 45 46 #define DEBUG_TYPE "post-RA-sched" 47 48 STATISTIC(NumNoops, "Number of noops inserted"); 49 STATISTIC(NumStalls, "Number of pipeline stalls"); 50 STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); 51 52 // Post-RA scheduling is enabled with 53 // TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to 54 // override the target. 55 static cl::opt<bool> 56 EnablePostRAScheduler("post-RA-scheduler", 57 cl::desc("Enable scheduling after register allocation"), 58 cl::init(false), cl::Hidden); 59 static cl::opt<std::string> 60 EnableAntiDepBreaking("break-anti-dependencies", 61 cl::desc("Break post-RA scheduling anti-dependencies: " 62 "\"critical\", \"all\", or \"none\""), 63 cl::init("none"), cl::Hidden); 64 65 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod 66 static cl::opt<int> 67 DebugDiv("postra-sched-debugdiv", 68 cl::desc("Debug control MBBs that are scheduled"), 69 cl::init(0), cl::Hidden); 70 static cl::opt<int> 71 DebugMod("postra-sched-debugmod", 72 cl::desc("Debug control MBBs that are scheduled"), 73 cl::init(0), cl::Hidden); 74 75 AntiDepBreaker::~AntiDepBreaker() = default; 76 77 namespace { 78 class PostRAScheduler { 79 const TargetInstrInfo *TII = nullptr; 80 MachineLoopInfo *MLI = nullptr; 81 AliasAnalysis *AA = nullptr; 82 const TargetMachine *TM = nullptr; 83 RegisterClassInfo RegClassInfo; 84 85 public: 86 PostRAScheduler(MachineFunction &MF, MachineLoopInfo *MLI, AliasAnalysis *AA, 87 const TargetMachine *TM) 88 : TII(MF.getSubtarget().getInstrInfo()), MLI(MLI), AA(AA), TM(TM) {} 89 bool run(MachineFunction &MF); 90 }; 91 92 class PostRASchedulerLegacy : public MachineFunctionPass { 93 public: 94 static char ID; 95 PostRASchedulerLegacy() : MachineFunctionPass(ID) {} 96 97 void getAnalysisUsage(AnalysisUsage &AU) const override { 98 AU.setPreservesCFG(); 99 AU.addRequired<AAResultsWrapperPass>(); 100 AU.addRequired<TargetPassConfig>(); 101 AU.addRequired<MachineDominatorTreeWrapperPass>(); 102 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 103 AU.addRequired<MachineLoopInfoWrapperPass>(); 104 AU.addPreserved<MachineLoopInfoWrapperPass>(); 105 MachineFunctionPass::getAnalysisUsage(AU); 106 } 107 108 MachineFunctionProperties getRequiredProperties() const override { 109 return MachineFunctionProperties().setNoVRegs(); 110 } 111 112 bool runOnMachineFunction(MachineFunction &Fn) override; 113 }; 114 char PostRASchedulerLegacy::ID = 0; 115 116 class SchedulePostRATDList : public ScheduleDAGInstrs { 117 /// AvailableQueue - The priority queue to use for the available SUnits. 118 /// 119 LatencyPriorityQueue AvailableQueue; 120 121 /// PendingQueue - This contains all of the instructions whose operands have 122 /// been issued, but their results are not ready yet (due to the latency of 123 /// the operation). Once the operands becomes available, the instruction is 124 /// added to the AvailableQueue. 125 std::vector<SUnit *> PendingQueue; 126 127 /// HazardRec - The hazard recognizer to use. 128 ScheduleHazardRecognizer *HazardRec; 129 130 /// AntiDepBreak - Anti-dependence breaking object, or NULL if none 131 AntiDepBreaker *AntiDepBreak; 132 133 /// AA - AliasAnalysis for making memory reference queries. 134 AliasAnalysis *AA; 135 136 /// The schedule. Null SUnit*'s represent noop instructions. 137 std::vector<SUnit *> Sequence; 138 139 /// Ordered list of DAG postprocessing steps. 140 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 141 142 /// The index in BB of RegionEnd. 143 /// 144 /// This is the instruction number from the top of the current block, not 145 /// the SlotIndex. It is only used by the AntiDepBreaker. 146 unsigned EndIndex = 0; 147 148 public: 149 SchedulePostRATDList( 150 MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, 151 const RegisterClassInfo &, 152 TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, 153 SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs); 154 155 ~SchedulePostRATDList() override; 156 157 /// startBlock - Initialize register live-range state for scheduling in 158 /// this block. 159 /// 160 void startBlock(MachineBasicBlock *BB) override; 161 162 // Set the index of RegionEnd within the current BB. 163 void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } 164 165 /// Initialize the scheduler state for the next scheduling region. 166 void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, 167 MachineBasicBlock::iterator end, 168 unsigned regioninstrs) override; 169 170 /// Notify that the scheduler has finished scheduling the current region. 171 void exitRegion() override; 172 173 /// Schedule - Schedule the instruction range using list scheduling. 174 /// 175 void schedule() override; 176 177 void EmitSchedule(); 178 179 /// Observe - Update liveness information to account for the current 180 /// instruction, which will not be scheduled. 181 /// 182 void Observe(MachineInstr &MI, unsigned Count); 183 184 /// finishBlock - Clean up register live-range state. 185 /// 186 void finishBlock() override; 187 188 private: 189 /// Apply each ScheduleDAGMutation step in order. 190 void postProcessDAG(); 191 192 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 193 void ReleaseSuccessors(SUnit *SU); 194 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 195 void ListScheduleTopDown(); 196 197 void dumpSchedule() const; 198 void emitNoop(unsigned CurCycle); 199 }; 200 } // namespace 201 202 char &llvm::PostRASchedulerID = PostRASchedulerLegacy::ID; 203 204 INITIALIZE_PASS(PostRASchedulerLegacy, DEBUG_TYPE, 205 "Post RA top-down list latency scheduler", false, false) 206 207 SchedulePostRATDList::SchedulePostRATDList( 208 MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, 209 const RegisterClassInfo &RCI, 210 TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, 211 SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs) 212 : ScheduleDAGInstrs(MF, &MLI), AA(AA) { 213 214 const InstrItineraryData *InstrItins = 215 MF.getSubtarget().getInstrItineraryData(); 216 HazardRec = 217 MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer( 218 InstrItins, this); 219 MF.getSubtarget().getPostRAMutations(Mutations); 220 221 assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || 222 MRI.tracksLiveness()) && 223 "Live-ins must be accurate for anti-dependency breaking"); 224 AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) 225 ? createAggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) 226 : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) 227 ? createCriticalAntiDepBreaker(MF, RCI) 228 : nullptr)); 229 } 230 231 SchedulePostRATDList::~SchedulePostRATDList() { 232 delete HazardRec; 233 delete AntiDepBreak; 234 } 235 236 /// Initialize state associated with the next scheduling region. 237 void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, 238 MachineBasicBlock::iterator begin, 239 MachineBasicBlock::iterator end, 240 unsigned regioninstrs) { 241 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); 242 Sequence.clear(); 243 } 244 245 /// Print the schedule before exiting the region. 246 void SchedulePostRATDList::exitRegion() { 247 LLVM_DEBUG({ 248 dbgs() << "*** Final schedule ***\n"; 249 dumpSchedule(); 250 dbgs() << '\n'; 251 }); 252 ScheduleDAGInstrs::exitRegion(); 253 } 254 255 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 256 /// dumpSchedule - dump the scheduled Sequence. 257 LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const { 258 for (const SUnit *SU : Sequence) { 259 if (SU) 260 dumpNode(*SU); 261 else 262 dbgs() << "**** NOOP ****\n"; 263 } 264 } 265 #endif 266 267 static bool enablePostRAScheduler(const TargetSubtargetInfo &ST, 268 CodeGenOptLevel OptLevel) { 269 // Check for explicit enable/disable of post-ra scheduling. 270 if (EnablePostRAScheduler.getPosition() > 0) 271 return EnablePostRAScheduler; 272 273 return ST.enablePostRAScheduler() && 274 OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); 275 } 276 277 bool PostRAScheduler::run(MachineFunction &MF) { 278 const auto &Subtarget = MF.getSubtarget(); 279 // Check that post-RA scheduling is enabled for this target. 280 if (!enablePostRAScheduler(Subtarget, TM->getOptLevel())) 281 return false; 282 283 TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = 284 Subtarget.getAntiDepBreakMode(); 285 if (EnableAntiDepBreaking.getPosition() > 0) { 286 AntiDepMode = (EnableAntiDepBreaking == "all") 287 ? TargetSubtargetInfo::ANTIDEP_ALL 288 : ((EnableAntiDepBreaking == "critical") 289 ? TargetSubtargetInfo::ANTIDEP_CRITICAL 290 : TargetSubtargetInfo::ANTIDEP_NONE); 291 } 292 SmallVector<const TargetRegisterClass *, 4> CriticalPathRCs; 293 Subtarget.getCriticalPathRCs(CriticalPathRCs); 294 RegClassInfo.runOnMachineFunction(MF); 295 296 LLVM_DEBUG(dbgs() << "PostRAScheduler\n"); 297 298 SchedulePostRATDList Scheduler(MF, *MLI, AA, RegClassInfo, AntiDepMode, 299 CriticalPathRCs); 300 301 // Loop over all of the basic blocks 302 for (auto &MBB : MF) { 303 #ifndef NDEBUG 304 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod 305 if (DebugDiv > 0) { 306 static int bbcnt = 0; 307 if (bbcnt++ % DebugDiv != DebugMod) 308 continue; 309 dbgs() << "*** DEBUG scheduling " << MF.getName() << ":" 310 << printMBBReference(MBB) << " ***\n"; 311 } 312 #endif 313 314 // Initialize register live-range state for scheduling in this block. 315 Scheduler.startBlock(&MBB); 316 317 // Schedule each sequence of instructions not interrupted by a label 318 // or anything else that effectively needs to shut down scheduling. 319 MachineBasicBlock::iterator Current = MBB.end(); 320 unsigned Count = MBB.size(), CurrentCount = Count; 321 for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) { 322 MachineInstr &MI = *std::prev(I); 323 --Count; 324 // Calls are not scheduling boundaries before register allocation, but 325 // post-ra we don't gain anything by scheduling across calls since we 326 // don't need to worry about register pressure. 327 if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, MF)) { 328 Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count); 329 Scheduler.setEndIndex(CurrentCount); 330 Scheduler.schedule(); 331 Scheduler.exitRegion(); 332 Scheduler.EmitSchedule(); 333 Current = &MI; 334 CurrentCount = Count; 335 Scheduler.Observe(MI, CurrentCount); 336 } 337 I = MI; 338 if (MI.isBundle()) 339 Count -= MI.getBundleSize(); 340 } 341 assert(Count == 0 && "Instruction count mismatch!"); 342 assert((MBB.begin() == Current || CurrentCount != 0) && 343 "Instruction count mismatch!"); 344 Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount); 345 Scheduler.setEndIndex(CurrentCount); 346 Scheduler.schedule(); 347 Scheduler.exitRegion(); 348 Scheduler.EmitSchedule(); 349 350 // Clean up register live-range state. 351 Scheduler.finishBlock(); 352 353 // Update register kills 354 Scheduler.fixupKills(MBB); 355 } 356 357 return true; 358 } 359 360 bool PostRASchedulerLegacy::runOnMachineFunction(MachineFunction &MF) { 361 if (skipFunction(MF.getFunction())) 362 return false; 363 364 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 365 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 366 const TargetMachine *TM = 367 &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 368 PostRAScheduler Impl(MF, MLI, AA, TM); 369 return Impl.run(MF); 370 } 371 372 PreservedAnalyses 373 PostRASchedulerPass::run(MachineFunction &MF, 374 MachineFunctionAnalysisManager &MFAM) { 375 MFPropsModifier _(*this, MF); 376 377 MachineLoopInfo *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF); 378 auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(MF) 379 .getManager(); 380 AliasAnalysis *AA = &FAM.getResult<AAManager>(MF.getFunction()); 381 PostRAScheduler Impl(MF, MLI, AA, TM); 382 bool Changed = Impl.run(MF); 383 if (!Changed) 384 return PreservedAnalyses::all(); 385 386 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); 387 PA.preserveSet<CFGAnalyses>(); 388 PA.preserve<MachineDominatorTreeAnalysis>(); 389 PA.preserve<MachineLoopAnalysis>(); 390 return PA; 391 } 392 393 /// StartBlock - Initialize register live-range state for scheduling in 394 /// this block. 395 /// 396 void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { 397 // Call the superclass. 398 ScheduleDAGInstrs::startBlock(BB); 399 400 // Reset the hazard recognizer and anti-dep breaker. 401 HazardRec->Reset(); 402 if (AntiDepBreak) 403 AntiDepBreak->StartBlock(BB); 404 } 405 406 /// Schedule - Schedule the instruction range using list scheduling. 407 /// 408 void SchedulePostRATDList::schedule() { 409 // Build the scheduling graph. 410 buildSchedGraph(AA); 411 412 if (AntiDepBreak) { 413 unsigned Broken = 414 AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, 415 EndIndex, DbgValues); 416 417 if (Broken != 0) { 418 // We made changes. Update the dependency graph. 419 // Theoretically we could update the graph in place: 420 // When a live range is changed to use a different register, remove 421 // the def's anti-dependence *and* output-dependence edges due to 422 // that register, and add new anti-dependence and output-dependence 423 // edges based on the next live range of the register. 424 ScheduleDAG::clearDAG(); 425 buildSchedGraph(AA); 426 427 NumFixedAnti += Broken; 428 } 429 } 430 431 postProcessDAG(); 432 433 LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n"); 434 LLVM_DEBUG(dump()); 435 436 AvailableQueue.initNodes(SUnits); 437 ListScheduleTopDown(); 438 AvailableQueue.releaseState(); 439 } 440 441 /// Observe - Update liveness information to account for the current 442 /// instruction, which will not be scheduled. 443 /// 444 void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) { 445 if (AntiDepBreak) 446 AntiDepBreak->Observe(MI, Count, EndIndex); 447 } 448 449 /// FinishBlock - Clean up register live-range state. 450 /// 451 void SchedulePostRATDList::finishBlock() { 452 if (AntiDepBreak) 453 AntiDepBreak->FinishBlock(); 454 455 // Call the superclass. 456 ScheduleDAGInstrs::finishBlock(); 457 } 458 459 /// Apply each ScheduleDAGMutation step in order. 460 void SchedulePostRATDList::postProcessDAG() { 461 for (auto &M : Mutations) 462 M->apply(this); 463 } 464 465 //===----------------------------------------------------------------------===// 466 // Top-Down Scheduling 467 //===----------------------------------------------------------------------===// 468 469 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 470 /// the PendingQueue if the count reaches zero. 471 void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 472 SUnit *SuccSU = SuccEdge->getSUnit(); 473 474 if (SuccEdge->isWeak()) { 475 --SuccSU->WeakPredsLeft; 476 return; 477 } 478 #ifndef NDEBUG 479 if (SuccSU->NumPredsLeft == 0) { 480 dbgs() << "*** Scheduling failed! ***\n"; 481 dumpNode(*SuccSU); 482 dbgs() << " has been released too many times!\n"; 483 llvm_unreachable(nullptr); 484 } 485 #endif 486 --SuccSU->NumPredsLeft; 487 488 // Standard scheduler algorithms will recompute the depth of the successor 489 // here as such: 490 // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 491 // 492 // However, we lazily compute node depth instead. Note that 493 // ScheduleNodeTopDown has already updated the depth of this node which causes 494 // all descendents to be marked dirty. Setting the successor depth explicitly 495 // here would cause depth to be recomputed for all its ancestors. If the 496 // successor is not yet ready (because of a transitively redundant edge) then 497 // this causes depth computation to be quadratic in the size of the DAG. 498 499 // If all the node's predecessors are scheduled, this node is ready 500 // to be scheduled. Ignore the special ExitSU node. 501 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 502 PendingQueue.push_back(SuccSU); 503 } 504 505 /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. 506 void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { 507 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 508 I != E; ++I) { 509 ReleaseSucc(SU, &*I); 510 } 511 } 512 513 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 514 /// count of its successors. If a successor pending count is zero, add it to 515 /// the Available queue. 516 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 517 LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 518 LLVM_DEBUG(dumpNode(*SU)); 519 520 Sequence.push_back(SU); 521 assert(CurCycle >= SU->getDepth() && 522 "Node scheduled above its depth!"); 523 SU->setDepthToAtLeast(CurCycle); 524 525 ReleaseSuccessors(SU); 526 SU->isScheduled = true; 527 AvailableQueue.scheduledNode(SU); 528 } 529 530 /// emitNoop - Add a noop to the current instruction sequence. 531 void SchedulePostRATDList::emitNoop(unsigned CurCycle) { 532 LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); 533 HazardRec->EmitNoop(); 534 Sequence.push_back(nullptr); // NULL here means noop 535 ++NumNoops; 536 } 537 538 /// ListScheduleTopDown - The main loop of list scheduling for top-down 539 /// schedulers. 540 void SchedulePostRATDList::ListScheduleTopDown() { 541 unsigned CurCycle = 0; 542 543 // We're scheduling top-down but we're visiting the regions in 544 // bottom-up order, so we don't know the hazards at the start of a 545 // region. So assume no hazards (this should usually be ok as most 546 // blocks are a single region). 547 HazardRec->Reset(); 548 549 // Release any successors of the special Entry node. 550 ReleaseSuccessors(&EntrySU); 551 552 // Add all leaves to Available queue. 553 for (SUnit &SUnit : SUnits) { 554 // It is available if it has no predecessors. 555 if (!SUnit.NumPredsLeft && !SUnit.isAvailable) { 556 AvailableQueue.push(&SUnit); 557 SUnit.isAvailable = true; 558 } 559 } 560 561 // In any cycle where we can't schedule any instructions, we must 562 // stall or emit a noop, depending on the target. 563 bool CycleHasInsts = false; 564 565 // While Available queue is not empty, grab the node with the highest 566 // priority. If it is not ready put it back. Schedule the node. 567 std::vector<SUnit*> NotReady; 568 Sequence.reserve(SUnits.size()); 569 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 570 // Check to see if any of the pending instructions are ready to issue. If 571 // so, add them to the available queue. 572 unsigned MinDepth = ~0u; 573 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 574 if (PendingQueue[i]->getDepth() <= CurCycle) { 575 AvailableQueue.push(PendingQueue[i]); 576 PendingQueue[i]->isAvailable = true; 577 PendingQueue[i] = PendingQueue.back(); 578 PendingQueue.pop_back(); 579 --i; --e; 580 } else if (PendingQueue[i]->getDepth() < MinDepth) 581 MinDepth = PendingQueue[i]->getDepth(); 582 } 583 584 LLVM_DEBUG(dbgs() << "\n*** Examining Available\n"; 585 AvailableQueue.dump(this)); 586 587 SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr; 588 bool HasNoopHazards = false; 589 while (!AvailableQueue.empty()) { 590 SUnit *CurSUnit = AvailableQueue.pop(); 591 592 ScheduleHazardRecognizer::HazardType HT = 593 HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); 594 if (HT == ScheduleHazardRecognizer::NoHazard) { 595 if (HazardRec->ShouldPreferAnother(CurSUnit)) { 596 if (!NotPreferredSUnit) { 597 // If this is the first non-preferred node for this cycle, then 598 // record it and continue searching for a preferred node. If this 599 // is not the first non-preferred node, then treat it as though 600 // there had been a hazard. 601 NotPreferredSUnit = CurSUnit; 602 continue; 603 } 604 } else { 605 FoundSUnit = CurSUnit; 606 break; 607 } 608 } 609 610 // Remember if this is a noop hazard. 611 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 612 613 NotReady.push_back(CurSUnit); 614 } 615 616 // If we have a non-preferred node, push it back onto the available list. 617 // If we did not find a preferred node, then schedule this first 618 // non-preferred node. 619 if (NotPreferredSUnit) { 620 if (!FoundSUnit) { 621 LLVM_DEBUG( 622 dbgs() << "*** Will schedule a non-preferred instruction...\n"); 623 FoundSUnit = NotPreferredSUnit; 624 } else { 625 AvailableQueue.push(NotPreferredSUnit); 626 } 627 628 NotPreferredSUnit = nullptr; 629 } 630 631 // Add the nodes that aren't ready back onto the available list. 632 if (!NotReady.empty()) { 633 AvailableQueue.push_all(NotReady); 634 NotReady.clear(); 635 } 636 637 // If we found a node to schedule... 638 if (FoundSUnit) { 639 // If we need to emit noops prior to this instruction, then do so. 640 unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit); 641 for (unsigned i = 0; i != NumPreNoops; ++i) 642 emitNoop(CurCycle); 643 644 // ... schedule the node... 645 ScheduleNodeTopDown(FoundSUnit, CurCycle); 646 HazardRec->EmitInstruction(FoundSUnit); 647 CycleHasInsts = true; 648 if (HazardRec->atIssueLimit()) { 649 LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle 650 << '\n'); 651 HazardRec->AdvanceCycle(); 652 ++CurCycle; 653 CycleHasInsts = false; 654 } 655 } else { 656 if (CycleHasInsts) { 657 LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); 658 HazardRec->AdvanceCycle(); 659 } else if (!HasNoopHazards) { 660 // Otherwise, we have a pipeline stall, but no other problem, 661 // just advance the current cycle and try again. 662 LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); 663 HazardRec->AdvanceCycle(); 664 ++NumStalls; 665 } else { 666 // Otherwise, we have no instructions to issue and we have instructions 667 // that will fault if we don't do this right. This is the case for 668 // processors without pipeline interlocks and other cases. 669 emitNoop(CurCycle); 670 } 671 672 ++CurCycle; 673 CycleHasInsts = false; 674 } 675 } 676 677 #ifndef NDEBUG 678 unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); 679 unsigned Noops = llvm::count(Sequence, nullptr); 680 assert(Sequence.size() - Noops == ScheduledNodes && 681 "The number of nodes scheduled doesn't match the expected number!"); 682 #endif // NDEBUG 683 } 684 685 // EmitSchedule - Emit the machine code in scheduled order. 686 void SchedulePostRATDList::EmitSchedule() { 687 RegionBegin = RegionEnd; 688 689 // If first instruction was a DBG_VALUE then put it back. 690 if (FirstDbgValue) 691 BB->splice(RegionEnd, BB, FirstDbgValue); 692 693 // Then re-insert them according to the given schedule. 694 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 695 if (SUnit *SU = Sequence[i]) 696 BB->splice(RegionEnd, BB, SU->getInstr()); 697 else 698 // Null SUnit* is a noop. 699 TII->insertNoop(*BB, RegionEnd); 700 701 // Update the Begin iterator, as the first instruction in the block 702 // may have been scheduled later. 703 if (i == 0) 704 RegionBegin = std::prev(RegionEnd); 705 } 706 707 // Reinsert any remaining debug_values. 708 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 709 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 710 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); 711 MachineInstr *DbgValue = P.first; 712 MachineBasicBlock::iterator OrigPrivMI = P.second; 713 BB->splice(++OrigPrivMI, BB, DbgValue); 714 } 715 DbgValues.clear(); 716 FirstDbgValue = nullptr; 717 } 718