1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/VLIWMachineScheduler.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/CodeGen/DFAPacketizer.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/RegisterClassInfo.h" 22 #include "llvm/CodeGen/RegisterPressure.h" 23 #include "llvm/CodeGen/ScheduleDAG.h" 24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSchedule.h" 29 #include "llvm/CodeGen/TargetSubtargetInfo.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <iomanip> 36 #include <limits> 37 #include <memory> 38 #include <sstream> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "machine-scheduler" 43 44 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, 45 cl::init(false)); 46 47 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden, 48 cl::init(true)); 49 50 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 51 cl::Hidden, cl::init(1)); 52 53 // Check if the scheduler should penalize instructions that are available to 54 // early due to a zero-latency dependence. 55 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 56 cl::init(true)); 57 58 // This value is used to determine if a register class is a high pressure set. 59 // We compute the maximum number of registers needed and divided by the total 60 // available. Then, we compare the result to this value. 61 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden, 62 cl::init(0.75f), 63 cl::desc("High register pressure threhold.")); 64 65 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, 66 const TargetSchedModel *SM) 67 : TII(STI.getInstrInfo()), SchedModel(SM) { 68 ResourcesModel = createPacketizer(STI); 69 70 // This hard requirement could be relaxed, 71 // but for now do not let it proceed. 72 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 73 74 Packet.reserve(SchedModel->getIssueWidth()); 75 Packet.clear(); 76 ResourcesModel->clearResources(); 77 } 78 79 void VLIWResourceModel::reset() { 80 Packet.clear(); 81 ResourcesModel->clearResources(); 82 } 83 84 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } 85 86 /// Return true if there is a dependence between SUd and SUu. 87 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { 88 if (SUd->Succs.size() == 0) 89 return false; 90 91 for (const auto &S : SUd->Succs) { 92 // Since we do not add pseudos to packets, might as well 93 // ignore order dependencies. 94 if (S.isCtrl()) 95 continue; 96 97 if (S.getSUnit() == SUu && S.getLatency() > 0) 98 return true; 99 } 100 return false; 101 } 102 103 /// Check if scheduling of this SU is possible 104 /// in the current packet. 105 /// It is _not_ precise (statefull), it is more like 106 /// another heuristic. Many corner cases are figured 107 /// empirically. 108 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { 109 if (!SU || !SU->getInstr()) 110 return false; 111 112 // First see if the pipeline could receive this instruction 113 // in the current cycle. 114 switch (SU->getInstr()->getOpcode()) { 115 default: 116 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 117 return false; 118 break; 119 case TargetOpcode::EXTRACT_SUBREG: 120 case TargetOpcode::INSERT_SUBREG: 121 case TargetOpcode::SUBREG_TO_REG: 122 case TargetOpcode::REG_SEQUENCE: 123 case TargetOpcode::IMPLICIT_DEF: 124 case TargetOpcode::COPY: 125 case TargetOpcode::INLINEASM: 126 case TargetOpcode::INLINEASM_BR: 127 break; 128 } 129 130 // Now see if there are no other dependencies to instructions already 131 // in the packet. 132 if (IsTop) { 133 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 134 if (hasDependence(Packet[i], SU)) 135 return false; 136 } else { 137 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 138 if (hasDependence(SU, Packet[i])) 139 return false; 140 } 141 return true; 142 } 143 144 /// Keep track of available resources. 145 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { 146 bool startNewCycle = false; 147 // Artificially reset state. 148 if (!SU) { 149 reset(); 150 TotalPackets++; 151 return false; 152 } 153 // If this SU does not fit in the packet or the packet is now full 154 // start a new one. 155 if (!isResourceAvailable(SU, IsTop) || 156 Packet.size() >= SchedModel->getIssueWidth()) { 157 reset(); 158 TotalPackets++; 159 startNewCycle = true; 160 } 161 162 switch (SU->getInstr()->getOpcode()) { 163 default: 164 ResourcesModel->reserveResources(*SU->getInstr()); 165 break; 166 case TargetOpcode::EXTRACT_SUBREG: 167 case TargetOpcode::INSERT_SUBREG: 168 case TargetOpcode::SUBREG_TO_REG: 169 case TargetOpcode::REG_SEQUENCE: 170 case TargetOpcode::IMPLICIT_DEF: 171 case TargetOpcode::KILL: 172 case TargetOpcode::CFI_INSTRUCTION: 173 case TargetOpcode::EH_LABEL: 174 case TargetOpcode::COPY: 175 case TargetOpcode::INLINEASM: 176 case TargetOpcode::INLINEASM_BR: 177 break; 178 } 179 Packet.push_back(SU); 180 181 #ifndef NDEBUG 182 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 183 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 184 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); 185 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 186 LLVM_DEBUG(Packet[i]->getInstr()->dump()); 187 } 188 #endif 189 190 return startNewCycle; 191 } 192 193 DFAPacketizer * 194 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { 195 return STI.getInstrInfo()->CreateTargetScheduleState(STI); 196 } 197 198 /// schedule - Called back from MachineScheduler::runOnMachineFunction 199 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 200 /// only includes instructions that have DAG nodes, not scheduling boundaries. 201 void VLIWMachineScheduler::schedule() { 202 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " 203 << printMBBReference(*BB) << " " << BB->getName() 204 << " in_func " << BB->getParent()->getName() 205 << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); 206 207 buildDAGWithRegPressure(); 208 209 Topo.InitDAGTopologicalSorting(); 210 211 // Postprocess the DAG to add platform-specific artificial dependencies. 212 postprocessDAG(); 213 214 SmallVector<SUnit *, 8> TopRoots, BotRoots; 215 findRootsAndBiasEdges(TopRoots, BotRoots); 216 217 // Initialize the strategy before modifying the DAG. 218 SchedImpl->initialize(this); 219 220 LLVM_DEBUG({ 221 unsigned maxH = 0; 222 for (const SUnit &SU : SUnits) 223 if (SU.getHeight() > maxH) 224 maxH = SU.getHeight(); 225 dbgs() << "Max Height " << maxH << "\n"; 226 }); 227 LLVM_DEBUG({ 228 unsigned maxD = 0; 229 for (const SUnit &SU : SUnits) 230 if (SU.getDepth() > maxD) 231 maxD = SU.getDepth(); 232 dbgs() << "Max Depth " << maxD << "\n"; 233 }); 234 LLVM_DEBUG(dump()); 235 if (ViewMISchedDAGs) 236 viewGraph(); 237 238 initQueues(TopRoots, BotRoots); 239 240 bool IsTopNode = false; 241 while (true) { 242 LLVM_DEBUG( 243 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 244 SUnit *SU = SchedImpl->pickNode(IsTopNode); 245 if (!SU) 246 break; 247 248 if (!checkSchedLimit()) 249 break; 250 251 scheduleMI(SU, IsTopNode); 252 253 // Notify the scheduling strategy after updating the DAG. 254 SchedImpl->schedNode(SU, IsTopNode); 255 256 updateQueues(SU, IsTopNode); 257 } 258 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 259 260 placeDebugValues(); 261 262 LLVM_DEBUG({ 263 dbgs() << "*** Final schedule for " 264 << printMBBReference(*begin()->getParent()) << " ***\n"; 265 dumpSchedule(); 266 dbgs() << '\n'; 267 }); 268 } 269 270 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 271 DAG = static_cast<VLIWMachineScheduler *>(dag); 272 SchedModel = DAG->getSchedModel(); 273 274 Top.init(DAG, SchedModel); 275 Bot.init(DAG, SchedModel); 276 277 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 278 // are disabled, then these HazardRecs will be disabled. 279 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 280 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 281 const TargetInstrInfo *TII = STI.getInstrInfo(); 282 delete Top.HazardRec; 283 delete Bot.HazardRec; 284 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 285 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 286 287 delete Top.ResourceModel; 288 delete Bot.ResourceModel; 289 Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 290 Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 291 292 const std::vector<unsigned> &MaxPressure = 293 DAG->getRegPressure().MaxSetPressure; 294 HighPressureSets.assign(MaxPressure.size(), false); 295 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { 296 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); 297 HighPressureSets[i] = 298 ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); 299 } 300 301 assert((!ForceTopDown || !ForceBottomUp) && 302 "-misched-topdown incompatible with -misched-bottomup"); 303 } 304 305 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( 306 const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { 307 return new VLIWResourceModel(STI, SchedModel); 308 } 309 310 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 311 for (const SDep &PI : SU->Preds) { 312 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 313 unsigned MinLatency = PI.getLatency(); 314 #ifndef NDEBUG 315 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 316 #endif 317 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 318 SU->TopReadyCycle = PredReadyCycle + MinLatency; 319 } 320 321 if (!SU->isScheduled) 322 Top.releaseNode(SU, SU->TopReadyCycle); 323 } 324 325 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 326 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 327 328 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; 329 ++I) { 330 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 331 unsigned MinLatency = I->getLatency(); 332 #ifndef NDEBUG 333 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 334 #endif 335 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 336 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 337 } 338 339 if (!SU->isScheduled) 340 Bot.releaseNode(SU, SU->BotReadyCycle); 341 } 342 343 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { 344 delete ResourceModel; 345 delete HazardRec; 346 } 347 348 /// Does this SU have a hazard within the current instruction group. 349 /// 350 /// The scheduler supports two modes of hazard recognition. The first is the 351 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 352 /// supports highly complicated in-order reservation tables 353 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 354 /// 355 /// The second is a streamlined mechanism that checks for hazards based on 356 /// simple counters that the scheduler itself maintains. It explicitly checks 357 /// for instruction dispatch limitations, including the number of micro-ops that 358 /// can dispatch per cycle. 359 /// 360 /// TODO: Also check whether the SU must start a new group. 361 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 362 if (HazardRec->isEnabled()) 363 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 364 365 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 366 if (IssueCount + uops > SchedModel->getIssueWidth()) 367 return true; 368 369 return false; 370 } 371 372 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( 373 SUnit *SU, unsigned ReadyCycle) { 374 if (ReadyCycle < MinReadyCycle) 375 MinReadyCycle = ReadyCycle; 376 377 // Check for interlocks first. For the purpose of other heuristics, an 378 // instruction that cannot issue appears as if it's not in the ReadyQueue. 379 if (ReadyCycle > CurrCycle || checkHazard(SU)) 380 381 Pending.push(SU); 382 else 383 Available.push(SU); 384 } 385 386 /// Move the boundary of scheduled code by one cycle. 387 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 388 unsigned Width = SchedModel->getIssueWidth(); 389 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 390 391 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 392 "MinReadyCycle uninitialized"); 393 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 394 395 if (!HazardRec->isEnabled()) { 396 // Bypass HazardRec virtual calls. 397 CurrCycle = NextCycle; 398 } else { 399 // Bypass getHazardType calls in case of long latency. 400 for (; CurrCycle != NextCycle; ++CurrCycle) { 401 if (isTop()) 402 HazardRec->AdvanceCycle(); 403 else 404 HazardRec->RecedeCycle(); 405 } 406 } 407 CheckPending = true; 408 409 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 410 << CurrCycle << '\n'); 411 } 412 413 /// Move the boundary of scheduled code by one SUnit. 414 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 415 bool startNewCycle = false; 416 417 // Update the reservation table. 418 if (HazardRec->isEnabled()) { 419 if (!isTop() && SU->isCall) { 420 // Calls are scheduled with their preceding instructions. For bottom-up 421 // scheduling, clear the pipeline state before emitting. 422 HazardRec->Reset(); 423 } 424 HazardRec->EmitInstruction(SU); 425 } 426 427 // Update DFA model. 428 startNewCycle = ResourceModel->reserveResources(SU, isTop()); 429 430 // Check the instruction group dispatch limit. 431 // TODO: Check if this SU must end a dispatch group. 432 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 433 if (startNewCycle) { 434 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 435 bumpCycle(); 436 } else 437 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " 438 << CurrCycle << '\n'); 439 } 440 441 /// Release pending ready nodes in to the available queue. This makes them 442 /// visible to heuristics. 443 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 444 // If the available queue is empty, it is safe to reset MinReadyCycle. 445 if (Available.empty()) 446 MinReadyCycle = std::numeric_limits<unsigned>::max(); 447 448 // Check to see if any of the pending instructions are ready to issue. If 449 // so, add them to the available queue. 450 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 451 SUnit *SU = *(Pending.begin() + i); 452 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 453 454 if (ReadyCycle < MinReadyCycle) 455 MinReadyCycle = ReadyCycle; 456 457 if (ReadyCycle > CurrCycle) 458 continue; 459 460 if (checkHazard(SU)) 461 continue; 462 463 Available.push(SU); 464 Pending.remove(Pending.begin() + i); 465 --i; 466 --e; 467 } 468 CheckPending = false; 469 } 470 471 /// Remove SU from the ready set for this boundary. 472 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 473 if (Available.isInQueue(SU)) 474 Available.remove(Available.find(SU)); 475 else { 476 assert(Pending.isInQueue(SU) && "bad ready count"); 477 Pending.remove(Pending.find(SU)); 478 } 479 } 480 481 /// If this queue only has one ready candidate, return it. As a side effect, 482 /// advance the cycle until at least one node is ready. If multiple instructions 483 /// are ready, return NULL. 484 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 485 if (CheckPending) 486 releasePending(); 487 488 auto AdvanceCycle = [this]() { 489 if (Available.empty()) 490 return true; 491 if (Available.size() == 1 && Pending.size() > 0) 492 return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || 493 getWeakLeft(*Available.begin(), isTop()) != 0; 494 return false; 495 }; 496 for (unsigned i = 0; AdvanceCycle(); ++i) { 497 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 498 "permanent hazard"); 499 (void)i; 500 ResourceModel->reserveResources(nullptr, isTop()); 501 bumpCycle(); 502 releasePending(); 503 } 504 if (Available.size() == 1) 505 return *Available.begin(); 506 return nullptr; 507 } 508 509 #ifndef NDEBUG 510 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 511 const ReadyQueue &Q, SUnit *SU, 512 int Cost, PressureChange P) { 513 dbgs() << Label << " " << Q.getName() << " "; 514 if (P.isValid()) 515 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 516 << P.getUnitInc() << " "; 517 else 518 dbgs() << " "; 519 dbgs() << "cost(" << Cost << ")\t"; 520 DAG->dumpNode(*SU); 521 } 522 523 // Very detailed queue dump, to be used with higher verbosity levels. 524 void ConvergingVLIWScheduler::readyQueueVerboseDump( 525 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 526 ReadyQueue &Q) { 527 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 528 529 dbgs() << ">>> " << Q.getName() << "\n"; 530 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 531 RegPressureDelta RPDelta; 532 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 533 DAG->getRegionCriticalPSets(), 534 DAG->getRegPressure().MaxSetPressure); 535 std::stringstream dbgstr; 536 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 537 dbgs() << dbgstr.str(); 538 SchedulingCost(Q, *I, Candidate, RPDelta, true); 539 dbgs() << "\t"; 540 (*I)->getInstr()->dump(); 541 } 542 dbgs() << "\n"; 543 } 544 #endif 545 546 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 547 /// of SU, return true (we may have duplicates) 548 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 549 if (SU->NumPredsLeft == 0) 550 return false; 551 552 for (auto &Pred : SU->Preds) { 553 // We found an available, but not scheduled, predecessor. 554 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 555 return false; 556 } 557 558 return true; 559 } 560 561 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 562 /// of SU, return true (we may have duplicates) 563 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 564 if (SU->NumSuccsLeft == 0) 565 return false; 566 567 for (auto &Succ : SU->Succs) { 568 // We found an available, but not scheduled, successor. 569 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 570 return false; 571 } 572 return true; 573 } 574 575 /// Check if the instruction changes the register pressure of a register in the 576 /// high pressure set. The function returns a negative value if the pressure 577 /// decreases and a positive value is the pressure increases. If the instruction 578 /// doesn't use a high pressure register or doesn't change the register 579 /// pressure, then return 0. 580 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { 581 PressureDiff &PD = DAG->getPressureDiff(SU); 582 for (const auto &P : PD) { 583 if (!P.isValid()) 584 continue; 585 // The pressure differences are computed bottom-up, so the comparision for 586 // an increase is positive in the bottom direction, but negative in the 587 // top-down direction. 588 if (HighPressureSets[P.getPSet()]) 589 return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); 590 } 591 return 0; 592 } 593 594 /// Single point to compute overall scheduling cost. 595 /// TODO: More heuristics will be used soon. 596 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 597 SchedCandidate &Candidate, 598 RegPressureDelta &Delta, 599 bool verbose) { 600 // Initial trivial priority. 601 int ResCount = 1; 602 603 // Do not waste time on a node that is already scheduled. 604 if (!SU || SU->isScheduled) 605 return ResCount; 606 607 LLVM_DEBUG(if (verbose) dbgs() 608 << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 609 // Forced priority is high. 610 if (SU->isScheduleHigh) { 611 ResCount += PriorityOne; 612 LLVM_DEBUG(dbgs() << "H|"); 613 } 614 615 unsigned IsAvailableAmt = 0; 616 // Critical path first. 617 if (Q.getID() == TopQID) { 618 if (Top.isLatencyBound(SU)) { 619 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 620 ResCount += (SU->getHeight() * ScaleTwo); 621 } 622 623 LLVM_DEBUG(if (verbose) { 624 std::stringstream dbgstr; 625 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 626 dbgs() << dbgstr.str(); 627 }); 628 629 // If resources are available for it, multiply the 630 // chance of scheduling. 631 if (Top.ResourceModel->isResourceAvailable(SU, true)) { 632 IsAvailableAmt = (PriorityTwo + PriorityThree); 633 ResCount += IsAvailableAmt; 634 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 635 } else 636 LLVM_DEBUG(if (verbose) dbgs() << " |"); 637 } else { 638 if (Bot.isLatencyBound(SU)) { 639 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 640 ResCount += (SU->getDepth() * ScaleTwo); 641 } 642 643 LLVM_DEBUG(if (verbose) { 644 std::stringstream dbgstr; 645 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 646 dbgs() << dbgstr.str(); 647 }); 648 649 // If resources are available for it, multiply the 650 // chance of scheduling. 651 if (Bot.ResourceModel->isResourceAvailable(SU, false)) { 652 IsAvailableAmt = (PriorityTwo + PriorityThree); 653 ResCount += IsAvailableAmt; 654 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 655 } else 656 LLVM_DEBUG(if (verbose) dbgs() << " |"); 657 } 658 659 unsigned NumNodesBlocking = 0; 660 if (Q.getID() == TopQID) { 661 // How many SUs does it block from scheduling? 662 // Look at all of the successors of this node. 663 // Count the number of nodes that 664 // this node is the sole unscheduled node for. 665 if (Top.isLatencyBound(SU)) 666 for (const SDep &SI : SU->Succs) 667 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 668 ++NumNodesBlocking; 669 } else { 670 // How many unscheduled predecessors block this node? 671 if (Bot.isLatencyBound(SU)) 672 for (const SDep &PI : SU->Preds) 673 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 674 ++NumNodesBlocking; 675 } 676 ResCount += (NumNodesBlocking * ScaleTwo); 677 678 LLVM_DEBUG(if (verbose) { 679 std::stringstream dbgstr; 680 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 681 dbgs() << dbgstr.str(); 682 }); 683 684 // Factor in reg pressure as a heuristic. 685 if (!IgnoreBBRegPressure) { 686 // Decrease priority by the amount that register pressure exceeds the limit. 687 ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); 688 // Decrease priority if register pressure exceeds the limit. 689 ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); 690 // Decrease priority slightly if register pressure would increase over the 691 // current maximum. 692 ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); 693 // If there are register pressure issues, then we remove the value added for 694 // the instruction being available. The rationale is that we really don't 695 // want to schedule an instruction that causes a spill. 696 if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && 697 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || 698 Delta.CurrentMax.getUnitInc())) 699 ResCount -= IsAvailableAmt; 700 LLVM_DEBUG(if (verbose) { 701 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 702 << Delta.CriticalMax.getUnitInc() << "/" 703 << Delta.CurrentMax.getUnitInc() << ")|"; 704 }); 705 } 706 707 // Give preference to a zero latency instruction if the dependent 708 // instruction is in the current packet. 709 if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { 710 for (const SDep &PI : SU->Preds) { 711 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 712 PI.getLatency() == 0 && 713 Top.ResourceModel->isInPacket(PI.getSUnit())) { 714 ResCount += PriorityThree; 715 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 716 } 717 } 718 } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { 719 for (const SDep &SI : SU->Succs) { 720 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 721 SI.getLatency() == 0 && 722 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 723 ResCount += PriorityThree; 724 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 725 } 726 } 727 } 728 729 // If the instruction has a non-zero latency dependence with an instruction in 730 // the current packet, then it should not be scheduled yet. The case occurs 731 // when the dependent instruction is scheduled in a new packet, so the 732 // scheduler updates the current cycle and pending instructions become 733 // available. 734 if (CheckEarlyAvail) { 735 if (Q.getID() == TopQID) { 736 for (const auto &PI : SU->Preds) { 737 if (PI.getLatency() > 0 && 738 Top.ResourceModel->isInPacket(PI.getSUnit())) { 739 ResCount -= PriorityOne; 740 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 741 } 742 } 743 } else { 744 for (const auto &SI : SU->Succs) { 745 if (SI.getLatency() > 0 && 746 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 747 ResCount -= PriorityOne; 748 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 749 } 750 } 751 } 752 } 753 754 LLVM_DEBUG(if (verbose) { 755 std::stringstream dbgstr; 756 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 757 dbgs() << dbgstr.str(); 758 }); 759 760 return ResCount; 761 } 762 763 /// Pick the best candidate from the top queue. 764 /// 765 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 766 /// DAG building. To adjust for the current scheduling location we need to 767 /// maintain the number of vreg uses remaining to be top-scheduled. 768 ConvergingVLIWScheduler::CandResult 769 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, 770 const RegPressureTracker &RPTracker, 771 SchedCandidate &Candidate) { 772 ReadyQueue &Q = Zone.Available; 773 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) 774 readyQueueVerboseDump(RPTracker, Candidate, Q); 775 else Q.dump();); 776 777 // getMaxPressureDelta temporarily modifies the tracker. 778 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 779 780 // BestSU remains NULL if no top candidates beat the best existing candidate. 781 CandResult FoundCandidate = NoCand; 782 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 783 RegPressureDelta RPDelta; 784 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 785 DAG->getRegionCriticalPSets(), 786 DAG->getRegPressure().MaxSetPressure); 787 788 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 789 790 // Initialize the candidate if needed. 791 if (!Candidate.SU) { 792 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 793 Candidate.SU = *I; 794 Candidate.RPDelta = RPDelta; 795 Candidate.SCost = CurrentCost; 796 FoundCandidate = NodeOrder; 797 continue; 798 } 799 800 // Choose node order for negative cost candidates. There is no good 801 // candidate in this case. 802 if (CurrentCost < 0 && Candidate.SCost < 0) { 803 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 804 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 805 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); 806 Candidate.SU = *I; 807 Candidate.RPDelta = RPDelta; 808 Candidate.SCost = CurrentCost; 809 FoundCandidate = NodeOrder; 810 } 811 continue; 812 } 813 814 // Best cost. 815 if (CurrentCost > Candidate.SCost) { 816 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 817 Candidate.SU = *I; 818 Candidate.RPDelta = RPDelta; 819 Candidate.SCost = CurrentCost; 820 FoundCandidate = BestCost; 821 continue; 822 } 823 824 // Choose an instruction that does not depend on an artificial edge. 825 unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); 826 unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); 827 if (CurrWeak != CandWeak) { 828 if (CurrWeak < CandWeak) { 829 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); 830 Candidate.SU = *I; 831 Candidate.RPDelta = RPDelta; 832 Candidate.SCost = CurrentCost; 833 FoundCandidate = Weak; 834 } 835 continue; 836 } 837 838 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { 839 unsigned CurrSize, CandSize; 840 if (Q.getID() == TopQID) { 841 CurrSize = (*I)->Succs.size(); 842 CandSize = Candidate.SU->Succs.size(); 843 } else { 844 CurrSize = (*I)->Preds.size(); 845 CandSize = Candidate.SU->Preds.size(); 846 } 847 if (CurrSize > CandSize) { 848 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 849 Candidate.SU = *I; 850 Candidate.RPDelta = RPDelta; 851 Candidate.SCost = CurrentCost; 852 FoundCandidate = BestCost; 853 } 854 // Keep the old candidate if it's a better candidate. That is, don't use 855 // the subsequent tie breaker. 856 if (CurrSize != CandSize) 857 continue; 858 } 859 860 // Tie breaker. 861 // To avoid scheduling indeterminism, we need a tie breaker 862 // for the case when cost is identical for two nodes. 863 if (UseNewerCandidate && CurrentCost == Candidate.SCost) { 864 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 865 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 866 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); 867 Candidate.SU = *I; 868 Candidate.RPDelta = RPDelta; 869 Candidate.SCost = CurrentCost; 870 FoundCandidate = NodeOrder; 871 continue; 872 } 873 } 874 875 // Fall through to original instruction order. 876 // Only consider node order if Candidate was chosen from this Q. 877 if (FoundCandidate == NoCand) 878 continue; 879 } 880 return FoundCandidate; 881 } 882 883 /// Pick the best candidate node from either the top or bottom queue. 884 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 885 // Schedule as far as possible in the direction of no choice. This is most 886 // efficient, but also provides the best heuristics for CriticalPSets. 887 if (SUnit *SU = Bot.pickOnlyChoice()) { 888 LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); 889 IsTopNode = false; 890 return SU; 891 } 892 if (SUnit *SU = Top.pickOnlyChoice()) { 893 LLVM_DEBUG(dbgs() << "Picked only Top\n"); 894 IsTopNode = true; 895 return SU; 896 } 897 SchedCandidate BotCand; 898 // Prefer bottom scheduling when heuristics are silent. 899 CandResult BotResult = 900 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 901 assert(BotResult != NoCand && "failed to find the first candidate"); 902 903 // If either Q has a single candidate that provides the least increase in 904 // Excess pressure, we can immediately schedule from that Q. 905 // 906 // RegionCriticalPSets summarizes the pressure within the scheduled region and 907 // affects picking from either Q. If scheduling in one direction must 908 // increase pressure for one of the excess PSets, then schedule in that 909 // direction first to provide more freedom in the other direction. 910 if (BotResult == SingleExcess || BotResult == SingleCritical) { 911 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); 912 IsTopNode = false; 913 return BotCand.SU; 914 } 915 // Check if the top Q has a better candidate. 916 SchedCandidate TopCand; 917 CandResult TopResult = 918 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 919 assert(TopResult != NoCand && "failed to find the first candidate"); 920 921 if (TopResult == SingleExcess || TopResult == SingleCritical) { 922 LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); 923 IsTopNode = true; 924 return TopCand.SU; 925 } 926 // If either Q has a single candidate that minimizes pressure above the 927 // original region's pressure pick it. 928 if (BotResult == SingleMax) { 929 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 930 IsTopNode = false; 931 return BotCand.SU; 932 } 933 if (TopResult == SingleMax) { 934 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 935 IsTopNode = true; 936 return TopCand.SU; 937 } 938 if (TopCand.SCost > BotCand.SCost) { 939 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); 940 IsTopNode = true; 941 return TopCand.SU; 942 } 943 // Otherwise prefer the bottom candidate in node order. 944 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 945 IsTopNode = false; 946 return BotCand.SU; 947 } 948 949 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 950 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 951 if (DAG->top() == DAG->bottom()) { 952 assert(Top.Available.empty() && Top.Pending.empty() && 953 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 954 return nullptr; 955 } 956 SUnit *SU; 957 if (ForceTopDown) { 958 SU = Top.pickOnlyChoice(); 959 if (!SU) { 960 SchedCandidate TopCand; 961 CandResult TopResult = 962 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 963 assert(TopResult != NoCand && "failed to find the first candidate"); 964 (void)TopResult; 965 SU = TopCand.SU; 966 } 967 IsTopNode = true; 968 } else if (ForceBottomUp) { 969 SU = Bot.pickOnlyChoice(); 970 if (!SU) { 971 SchedCandidate BotCand; 972 CandResult BotResult = 973 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 974 assert(BotResult != NoCand && "failed to find the first candidate"); 975 (void)BotResult; 976 SU = BotCand.SU; 977 } 978 IsTopNode = false; 979 } else { 980 SU = pickNodeBidrectional(IsTopNode); 981 } 982 if (SU->isTopReady()) 983 Top.removeReady(SU); 984 if (SU->isBottomReady()) 985 Bot.removeReady(SU); 986 987 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 988 << " Scheduling instruction in cycle " 989 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" 990 << reportPackets() << ")\n"; 991 DAG->dumpNode(*SU)); 992 return SU; 993 } 994 995 /// Update the scheduler's state after scheduling a node. This is the same node 996 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 997 /// to update it's state based on the current cycle before MachineSchedStrategy 998 /// does. 999 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1000 if (IsTopNode) { 1001 Top.bumpNode(SU); 1002 SU->TopReadyCycle = Top.CurrCycle; 1003 } else { 1004 Bot.bumpNode(SU); 1005 SU->BotReadyCycle = Bot.CurrCycle; 1006 } 1007 } 1008