1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/VLIWMachineScheduler.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/CodeGen/DFAPacketizer.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/RegisterClassInfo.h" 22 #include "llvm/CodeGen/RegisterPressure.h" 23 #include "llvm/CodeGen/ScheduleDAG.h" 24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSchedule.h" 29 #include "llvm/CodeGen/TargetSubtargetInfo.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <iomanip> 37 #include <limits> 38 #include <memory> 39 #include <sstream> 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "machine-scheduler" 44 45 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, 46 cl::ZeroOrMore, cl::init(false)); 47 48 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden, 49 cl::ZeroOrMore, cl::init(true)); 50 51 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 52 cl::Hidden, cl::ZeroOrMore, 53 cl::init(1)); 54 55 // Check if the scheduler should penalize instructions that are available to 56 // early due to a zero-latency dependence. 57 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 58 cl::ZeroOrMore, cl::init(true)); 59 60 // This value is used to determine if a register class is a high pressure set. 61 // We compute the maximum number of registers needed and divided by the total 62 // available. Then, we compare the result to this value. 63 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden, 64 cl::init(0.75f), 65 cl::desc("High register pressure threhold.")); 66 67 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, 68 const TargetSchedModel *SM) 69 : TII(STI.getInstrInfo()), SchedModel(SM) { 70 ResourcesModel = createPacketizer(STI); 71 72 // This hard requirement could be relaxed, 73 // but for now do not let it proceed. 74 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 75 76 Packet.reserve(SchedModel->getIssueWidth()); 77 Packet.clear(); 78 ResourcesModel->clearResources(); 79 } 80 81 void VLIWResourceModel::reset() { 82 Packet.clear(); 83 ResourcesModel->clearResources(); 84 } 85 86 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } 87 88 /// Return true if there is a dependence between SUd and SUu. 89 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { 90 if (SUd->Succs.size() == 0) 91 return false; 92 93 for (const auto &S : SUd->Succs) { 94 // Since we do not add pseudos to packets, might as well 95 // ignore order dependencies. 96 if (S.isCtrl()) 97 continue; 98 99 if (S.getSUnit() == SUu && S.getLatency() > 0) 100 return true; 101 } 102 return false; 103 } 104 105 /// Check if scheduling of this SU is possible 106 /// in the current packet. 107 /// It is _not_ precise (statefull), it is more like 108 /// another heuristic. Many corner cases are figured 109 /// empirically. 110 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { 111 if (!SU || !SU->getInstr()) 112 return false; 113 114 // First see if the pipeline could receive this instruction 115 // in the current cycle. 116 switch (SU->getInstr()->getOpcode()) { 117 default: 118 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 119 return false; 120 break; 121 case TargetOpcode::EXTRACT_SUBREG: 122 case TargetOpcode::INSERT_SUBREG: 123 case TargetOpcode::SUBREG_TO_REG: 124 case TargetOpcode::REG_SEQUENCE: 125 case TargetOpcode::IMPLICIT_DEF: 126 case TargetOpcode::COPY: 127 case TargetOpcode::INLINEASM: 128 case TargetOpcode::INLINEASM_BR: 129 break; 130 } 131 132 // Now see if there are no other dependencies to instructions already 133 // in the packet. 134 if (IsTop) { 135 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 136 if (hasDependence(Packet[i], SU)) 137 return false; 138 } else { 139 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 140 if (hasDependence(SU, Packet[i])) 141 return false; 142 } 143 return true; 144 } 145 146 /// Keep track of available resources. 147 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { 148 bool startNewCycle = false; 149 // Artificially reset state. 150 if (!SU) { 151 reset(); 152 TotalPackets++; 153 return false; 154 } 155 // If this SU does not fit in the packet or the packet is now full 156 // start a new one. 157 if (!isResourceAvailable(SU, IsTop) || 158 Packet.size() >= SchedModel->getIssueWidth()) { 159 reset(); 160 TotalPackets++; 161 startNewCycle = true; 162 } 163 164 switch (SU->getInstr()->getOpcode()) { 165 default: 166 ResourcesModel->reserveResources(*SU->getInstr()); 167 break; 168 case TargetOpcode::EXTRACT_SUBREG: 169 case TargetOpcode::INSERT_SUBREG: 170 case TargetOpcode::SUBREG_TO_REG: 171 case TargetOpcode::REG_SEQUENCE: 172 case TargetOpcode::IMPLICIT_DEF: 173 case TargetOpcode::KILL: 174 case TargetOpcode::CFI_INSTRUCTION: 175 case TargetOpcode::EH_LABEL: 176 case TargetOpcode::COPY: 177 case TargetOpcode::INLINEASM: 178 case TargetOpcode::INLINEASM_BR: 179 break; 180 } 181 Packet.push_back(SU); 182 183 #ifndef NDEBUG 184 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 185 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 186 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); 187 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 188 LLVM_DEBUG(Packet[i]->getInstr()->dump()); 189 } 190 #endif 191 192 return startNewCycle; 193 } 194 195 DFAPacketizer * 196 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { 197 return STI.getInstrInfo()->CreateTargetScheduleState(STI); 198 } 199 200 /// schedule - Called back from MachineScheduler::runOnMachineFunction 201 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 202 /// only includes instructions that have DAG nodes, not scheduling boundaries. 203 void VLIWMachineScheduler::schedule() { 204 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " 205 << printMBBReference(*BB) << " " << BB->getName() 206 << " in_func " << BB->getParent()->getName() 207 << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); 208 209 buildDAGWithRegPressure(); 210 211 Topo.InitDAGTopologicalSorting(); 212 213 // Postprocess the DAG to add platform-specific artificial dependencies. 214 postprocessDAG(); 215 216 SmallVector<SUnit *, 8> TopRoots, BotRoots; 217 findRootsAndBiasEdges(TopRoots, BotRoots); 218 219 // Initialize the strategy before modifying the DAG. 220 SchedImpl->initialize(this); 221 222 LLVM_DEBUG({ 223 unsigned maxH = 0; 224 for (const SUnit &SU : SUnits) 225 if (SU.getHeight() > maxH) 226 maxH = SU.getHeight(); 227 dbgs() << "Max Height " << maxH << "\n"; 228 }); 229 LLVM_DEBUG({ 230 unsigned maxD = 0; 231 for (const SUnit &SU : SUnits) 232 if (SU.getDepth() > maxD) 233 maxD = SU.getDepth(); 234 dbgs() << "Max Depth " << maxD << "\n"; 235 }); 236 LLVM_DEBUG(dump()); 237 if (ViewMISchedDAGs) 238 viewGraph(); 239 240 initQueues(TopRoots, BotRoots); 241 242 bool IsTopNode = false; 243 while (true) { 244 LLVM_DEBUG( 245 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 246 SUnit *SU = SchedImpl->pickNode(IsTopNode); 247 if (!SU) 248 break; 249 250 if (!checkSchedLimit()) 251 break; 252 253 scheduleMI(SU, IsTopNode); 254 255 // Notify the scheduling strategy after updating the DAG. 256 SchedImpl->schedNode(SU, IsTopNode); 257 258 updateQueues(SU, IsTopNode); 259 } 260 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 261 262 placeDebugValues(); 263 264 LLVM_DEBUG({ 265 dbgs() << "*** Final schedule for " 266 << printMBBReference(*begin()->getParent()) << " ***\n"; 267 dumpSchedule(); 268 dbgs() << '\n'; 269 }); 270 } 271 272 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 273 DAG = static_cast<VLIWMachineScheduler *>(dag); 274 SchedModel = DAG->getSchedModel(); 275 276 Top.init(DAG, SchedModel); 277 Bot.init(DAG, SchedModel); 278 279 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 280 // are disabled, then these HazardRecs will be disabled. 281 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 282 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 283 const TargetInstrInfo *TII = STI.getInstrInfo(); 284 delete Top.HazardRec; 285 delete Bot.HazardRec; 286 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 287 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 288 289 delete Top.ResourceModel; 290 delete Bot.ResourceModel; 291 Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 292 Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 293 294 const std::vector<unsigned> &MaxPressure = 295 DAG->getRegPressure().MaxSetPressure; 296 HighPressureSets.assign(MaxPressure.size(), false); 297 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { 298 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); 299 HighPressureSets[i] = 300 ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); 301 } 302 303 assert((!ForceTopDown || !ForceBottomUp) && 304 "-misched-topdown incompatible with -misched-bottomup"); 305 } 306 307 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( 308 const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { 309 return new VLIWResourceModel(STI, SchedModel); 310 } 311 312 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 313 for (const SDep &PI : SU->Preds) { 314 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 315 unsigned MinLatency = PI.getLatency(); 316 #ifndef NDEBUG 317 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 318 #endif 319 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 320 SU->TopReadyCycle = PredReadyCycle + MinLatency; 321 } 322 323 if (!SU->isScheduled) 324 Top.releaseNode(SU, SU->TopReadyCycle); 325 } 326 327 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 328 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 329 330 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; 331 ++I) { 332 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 333 unsigned MinLatency = I->getLatency(); 334 #ifndef NDEBUG 335 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 336 #endif 337 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 338 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 339 } 340 341 if (!SU->isScheduled) 342 Bot.releaseNode(SU, SU->BotReadyCycle); 343 } 344 345 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { 346 delete ResourceModel; 347 delete HazardRec; 348 } 349 350 /// Does this SU have a hazard within the current instruction group. 351 /// 352 /// The scheduler supports two modes of hazard recognition. The first is the 353 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 354 /// supports highly complicated in-order reservation tables 355 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 356 /// 357 /// The second is a streamlined mechanism that checks for hazards based on 358 /// simple counters that the scheduler itself maintains. It explicitly checks 359 /// for instruction dispatch limitations, including the number of micro-ops that 360 /// can dispatch per cycle. 361 /// 362 /// TODO: Also check whether the SU must start a new group. 363 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 364 if (HazardRec->isEnabled()) 365 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 366 367 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 368 if (IssueCount + uops > SchedModel->getIssueWidth()) 369 return true; 370 371 return false; 372 } 373 374 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( 375 SUnit *SU, unsigned ReadyCycle) { 376 if (ReadyCycle < MinReadyCycle) 377 MinReadyCycle = ReadyCycle; 378 379 // Check for interlocks first. For the purpose of other heuristics, an 380 // instruction that cannot issue appears as if it's not in the ReadyQueue. 381 if (ReadyCycle > CurrCycle || checkHazard(SU)) 382 383 Pending.push(SU); 384 else 385 Available.push(SU); 386 } 387 388 /// Move the boundary of scheduled code by one cycle. 389 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 390 unsigned Width = SchedModel->getIssueWidth(); 391 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 392 393 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 394 "MinReadyCycle uninitialized"); 395 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 396 397 if (!HazardRec->isEnabled()) { 398 // Bypass HazardRec virtual calls. 399 CurrCycle = NextCycle; 400 } else { 401 // Bypass getHazardType calls in case of long latency. 402 for (; CurrCycle != NextCycle; ++CurrCycle) { 403 if (isTop()) 404 HazardRec->AdvanceCycle(); 405 else 406 HazardRec->RecedeCycle(); 407 } 408 } 409 CheckPending = true; 410 411 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 412 << CurrCycle << '\n'); 413 } 414 415 /// Move the boundary of scheduled code by one SUnit. 416 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 417 bool startNewCycle = false; 418 419 // Update the reservation table. 420 if (HazardRec->isEnabled()) { 421 if (!isTop() && SU->isCall) { 422 // Calls are scheduled with their preceding instructions. For bottom-up 423 // scheduling, clear the pipeline state before emitting. 424 HazardRec->Reset(); 425 } 426 HazardRec->EmitInstruction(SU); 427 } 428 429 // Update DFA model. 430 startNewCycle = ResourceModel->reserveResources(SU, isTop()); 431 432 // Check the instruction group dispatch limit. 433 // TODO: Check if this SU must end a dispatch group. 434 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 435 if (startNewCycle) { 436 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 437 bumpCycle(); 438 } else 439 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " 440 << CurrCycle << '\n'); 441 } 442 443 /// Release pending ready nodes in to the available queue. This makes them 444 /// visible to heuristics. 445 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 446 // If the available queue is empty, it is safe to reset MinReadyCycle. 447 if (Available.empty()) 448 MinReadyCycle = std::numeric_limits<unsigned>::max(); 449 450 // Check to see if any of the pending instructions are ready to issue. If 451 // so, add them to the available queue. 452 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 453 SUnit *SU = *(Pending.begin() + i); 454 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 455 456 if (ReadyCycle < MinReadyCycle) 457 MinReadyCycle = ReadyCycle; 458 459 if (ReadyCycle > CurrCycle) 460 continue; 461 462 if (checkHazard(SU)) 463 continue; 464 465 Available.push(SU); 466 Pending.remove(Pending.begin() + i); 467 --i; 468 --e; 469 } 470 CheckPending = false; 471 } 472 473 /// Remove SU from the ready set for this boundary. 474 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 475 if (Available.isInQueue(SU)) 476 Available.remove(Available.find(SU)); 477 else { 478 assert(Pending.isInQueue(SU) && "bad ready count"); 479 Pending.remove(Pending.find(SU)); 480 } 481 } 482 483 /// If this queue only has one ready candidate, return it. As a side effect, 484 /// advance the cycle until at least one node is ready. If multiple instructions 485 /// are ready, return NULL. 486 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 487 if (CheckPending) 488 releasePending(); 489 490 auto AdvanceCycle = [this]() { 491 if (Available.empty()) 492 return true; 493 if (Available.size() == 1 && Pending.size() > 0) 494 return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || 495 getWeakLeft(*Available.begin(), isTop()) != 0; 496 return false; 497 }; 498 for (unsigned i = 0; AdvanceCycle(); ++i) { 499 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 500 "permanent hazard"); 501 (void)i; 502 ResourceModel->reserveResources(nullptr, isTop()); 503 bumpCycle(); 504 releasePending(); 505 } 506 if (Available.size() == 1) 507 return *Available.begin(); 508 return nullptr; 509 } 510 511 #ifndef NDEBUG 512 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 513 const ReadyQueue &Q, SUnit *SU, 514 int Cost, PressureChange P) { 515 dbgs() << Label << " " << Q.getName() << " "; 516 if (P.isValid()) 517 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 518 << P.getUnitInc() << " "; 519 else 520 dbgs() << " "; 521 dbgs() << "cost(" << Cost << ")\t"; 522 DAG->dumpNode(*SU); 523 } 524 525 // Very detailed queue dump, to be used with higher verbosity levels. 526 void ConvergingVLIWScheduler::readyQueueVerboseDump( 527 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 528 ReadyQueue &Q) { 529 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 530 531 dbgs() << ">>> " << Q.getName() << "\n"; 532 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 533 RegPressureDelta RPDelta; 534 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 535 DAG->getRegionCriticalPSets(), 536 DAG->getRegPressure().MaxSetPressure); 537 std::stringstream dbgstr; 538 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 539 dbgs() << dbgstr.str(); 540 SchedulingCost(Q, *I, Candidate, RPDelta, true); 541 dbgs() << "\t"; 542 (*I)->getInstr()->dump(); 543 } 544 dbgs() << "\n"; 545 } 546 #endif 547 548 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 549 /// of SU, return true (we may have duplicates) 550 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 551 if (SU->NumPredsLeft == 0) 552 return false; 553 554 for (auto &Pred : SU->Preds) { 555 // We found an available, but not scheduled, predecessor. 556 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 557 return false; 558 } 559 560 return true; 561 } 562 563 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 564 /// of SU, return true (we may have duplicates) 565 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 566 if (SU->NumSuccsLeft == 0) 567 return false; 568 569 for (auto &Succ : SU->Succs) { 570 // We found an available, but not scheduled, successor. 571 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 572 return false; 573 } 574 return true; 575 } 576 577 /// Check if the instruction changes the register pressure of a register in the 578 /// high pressure set. The function returns a negative value if the pressure 579 /// decreases and a positive value is the pressure increases. If the instruction 580 /// doesn't use a high pressure register or doesn't change the register 581 /// pressure, then return 0. 582 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { 583 PressureDiff &PD = DAG->getPressureDiff(SU); 584 for (auto &P : PD) { 585 if (!P.isValid()) 586 continue; 587 // The pressure differences are computed bottom-up, so the comparision for 588 // an increase is positive in the bottom direction, but negative in the 589 // top-down direction. 590 if (HighPressureSets[P.getPSet()]) 591 return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); 592 } 593 return 0; 594 } 595 596 /// Single point to compute overall scheduling cost. 597 /// TODO: More heuristics will be used soon. 598 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 599 SchedCandidate &Candidate, 600 RegPressureDelta &Delta, 601 bool verbose) { 602 // Initial trivial priority. 603 int ResCount = 1; 604 605 // Do not waste time on a node that is already scheduled. 606 if (!SU || SU->isScheduled) 607 return ResCount; 608 609 LLVM_DEBUG(if (verbose) dbgs() 610 << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 611 // Forced priority is high. 612 if (SU->isScheduleHigh) { 613 ResCount += PriorityOne; 614 LLVM_DEBUG(dbgs() << "H|"); 615 } 616 617 unsigned IsAvailableAmt = 0; 618 // Critical path first. 619 if (Q.getID() == TopQID) { 620 if (Top.isLatencyBound(SU)) { 621 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 622 ResCount += (SU->getHeight() * ScaleTwo); 623 } 624 625 LLVM_DEBUG(if (verbose) { 626 std::stringstream dbgstr; 627 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 628 dbgs() << dbgstr.str(); 629 }); 630 631 // If resources are available for it, multiply the 632 // chance of scheduling. 633 if (Top.ResourceModel->isResourceAvailable(SU, true)) { 634 IsAvailableAmt = (PriorityTwo + PriorityThree); 635 ResCount += IsAvailableAmt; 636 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 637 } else 638 LLVM_DEBUG(if (verbose) dbgs() << " |"); 639 } else { 640 if (Bot.isLatencyBound(SU)) { 641 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 642 ResCount += (SU->getDepth() * ScaleTwo); 643 } 644 645 LLVM_DEBUG(if (verbose) { 646 std::stringstream dbgstr; 647 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 648 dbgs() << dbgstr.str(); 649 }); 650 651 // If resources are available for it, multiply the 652 // chance of scheduling. 653 if (Bot.ResourceModel->isResourceAvailable(SU, false)) { 654 IsAvailableAmt = (PriorityTwo + PriorityThree); 655 ResCount += IsAvailableAmt; 656 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 657 } else 658 LLVM_DEBUG(if (verbose) dbgs() << " |"); 659 } 660 661 unsigned NumNodesBlocking = 0; 662 if (Q.getID() == TopQID) { 663 // How many SUs does it block from scheduling? 664 // Look at all of the successors of this node. 665 // Count the number of nodes that 666 // this node is the sole unscheduled node for. 667 if (Top.isLatencyBound(SU)) 668 for (const SDep &SI : SU->Succs) 669 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 670 ++NumNodesBlocking; 671 } else { 672 // How many unscheduled predecessors block this node? 673 if (Bot.isLatencyBound(SU)) 674 for (const SDep &PI : SU->Preds) 675 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 676 ++NumNodesBlocking; 677 } 678 ResCount += (NumNodesBlocking * ScaleTwo); 679 680 LLVM_DEBUG(if (verbose) { 681 std::stringstream dbgstr; 682 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 683 dbgs() << dbgstr.str(); 684 }); 685 686 // Factor in reg pressure as a heuristic. 687 if (!IgnoreBBRegPressure) { 688 // Decrease priority by the amount that register pressure exceeds the limit. 689 ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); 690 // Decrease priority if register pressure exceeds the limit. 691 ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); 692 // Decrease priority slightly if register pressure would increase over the 693 // current maximum. 694 ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); 695 // If there are register pressure issues, then we remove the value added for 696 // the instruction being available. The rationale is that we really don't 697 // want to schedule an instruction that causes a spill. 698 if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && 699 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || 700 Delta.CurrentMax.getUnitInc())) 701 ResCount -= IsAvailableAmt; 702 LLVM_DEBUG(if (verbose) { 703 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 704 << Delta.CriticalMax.getUnitInc() << "/" 705 << Delta.CurrentMax.getUnitInc() << ")|"; 706 }); 707 } 708 709 // Give preference to a zero latency instruction if the dependent 710 // instruction is in the current packet. 711 if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { 712 for (const SDep &PI : SU->Preds) { 713 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 714 PI.getLatency() == 0 && 715 Top.ResourceModel->isInPacket(PI.getSUnit())) { 716 ResCount += PriorityThree; 717 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 718 } 719 } 720 } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { 721 for (const SDep &SI : SU->Succs) { 722 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 723 SI.getLatency() == 0 && 724 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 725 ResCount += PriorityThree; 726 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 727 } 728 } 729 } 730 731 // If the instruction has a non-zero latency dependence with an instruction in 732 // the current packet, then it should not be scheduled yet. The case occurs 733 // when the dependent instruction is scheduled in a new packet, so the 734 // scheduler updates the current cycle and pending instructions become 735 // available. 736 if (CheckEarlyAvail) { 737 if (Q.getID() == TopQID) { 738 for (const auto &PI : SU->Preds) { 739 if (PI.getLatency() > 0 && 740 Top.ResourceModel->isInPacket(PI.getSUnit())) { 741 ResCount -= PriorityOne; 742 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 743 } 744 } 745 } else { 746 for (const auto &SI : SU->Succs) { 747 if (SI.getLatency() > 0 && 748 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 749 ResCount -= PriorityOne; 750 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 751 } 752 } 753 } 754 } 755 756 LLVM_DEBUG(if (verbose) { 757 std::stringstream dbgstr; 758 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 759 dbgs() << dbgstr.str(); 760 }); 761 762 return ResCount; 763 } 764 765 /// Pick the best candidate from the top queue. 766 /// 767 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 768 /// DAG building. To adjust for the current scheduling location we need to 769 /// maintain the number of vreg uses remaining to be top-scheduled. 770 ConvergingVLIWScheduler::CandResult 771 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, 772 const RegPressureTracker &RPTracker, 773 SchedCandidate &Candidate) { 774 ReadyQueue &Q = Zone.Available; 775 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) 776 readyQueueVerboseDump(RPTracker, Candidate, Q); 777 else Q.dump();); 778 779 // getMaxPressureDelta temporarily modifies the tracker. 780 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 781 782 // BestSU remains NULL if no top candidates beat the best existing candidate. 783 CandResult FoundCandidate = NoCand; 784 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 785 RegPressureDelta RPDelta; 786 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 787 DAG->getRegionCriticalPSets(), 788 DAG->getRegPressure().MaxSetPressure); 789 790 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 791 792 // Initialize the candidate if needed. 793 if (!Candidate.SU) { 794 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 795 Candidate.SU = *I; 796 Candidate.RPDelta = RPDelta; 797 Candidate.SCost = CurrentCost; 798 FoundCandidate = NodeOrder; 799 continue; 800 } 801 802 // Choose node order for negative cost candidates. There is no good 803 // candidate in this case. 804 if (CurrentCost < 0 && Candidate.SCost < 0) { 805 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 806 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 807 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); 808 Candidate.SU = *I; 809 Candidate.RPDelta = RPDelta; 810 Candidate.SCost = CurrentCost; 811 FoundCandidate = NodeOrder; 812 } 813 continue; 814 } 815 816 // Best cost. 817 if (CurrentCost > Candidate.SCost) { 818 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 819 Candidate.SU = *I; 820 Candidate.RPDelta = RPDelta; 821 Candidate.SCost = CurrentCost; 822 FoundCandidate = BestCost; 823 continue; 824 } 825 826 // Choose an instruction that does not depend on an artificial edge. 827 unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); 828 unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); 829 if (CurrWeak != CandWeak) { 830 if (CurrWeak < CandWeak) { 831 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); 832 Candidate.SU = *I; 833 Candidate.RPDelta = RPDelta; 834 Candidate.SCost = CurrentCost; 835 FoundCandidate = Weak; 836 } 837 continue; 838 } 839 840 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { 841 unsigned CurrSize, CandSize; 842 if (Q.getID() == TopQID) { 843 CurrSize = (*I)->Succs.size(); 844 CandSize = Candidate.SU->Succs.size(); 845 } else { 846 CurrSize = (*I)->Preds.size(); 847 CandSize = Candidate.SU->Preds.size(); 848 } 849 if (CurrSize > CandSize) { 850 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 851 Candidate.SU = *I; 852 Candidate.RPDelta = RPDelta; 853 Candidate.SCost = CurrentCost; 854 FoundCandidate = BestCost; 855 } 856 // Keep the old candidate if it's a better candidate. That is, don't use 857 // the subsequent tie breaker. 858 if (CurrSize != CandSize) 859 continue; 860 } 861 862 // Tie breaker. 863 // To avoid scheduling indeterminism, we need a tie breaker 864 // for the case when cost is identical for two nodes. 865 if (UseNewerCandidate && CurrentCost == Candidate.SCost) { 866 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 867 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 868 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); 869 Candidate.SU = *I; 870 Candidate.RPDelta = RPDelta; 871 Candidate.SCost = CurrentCost; 872 FoundCandidate = NodeOrder; 873 continue; 874 } 875 } 876 877 // Fall through to original instruction order. 878 // Only consider node order if Candidate was chosen from this Q. 879 if (FoundCandidate == NoCand) 880 continue; 881 } 882 return FoundCandidate; 883 } 884 885 /// Pick the best candidate node from either the top or bottom queue. 886 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 887 // Schedule as far as possible in the direction of no choice. This is most 888 // efficient, but also provides the best heuristics for CriticalPSets. 889 if (SUnit *SU = Bot.pickOnlyChoice()) { 890 LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); 891 IsTopNode = false; 892 return SU; 893 } 894 if (SUnit *SU = Top.pickOnlyChoice()) { 895 LLVM_DEBUG(dbgs() << "Picked only Top\n"); 896 IsTopNode = true; 897 return SU; 898 } 899 SchedCandidate BotCand; 900 // Prefer bottom scheduling when heuristics are silent. 901 CandResult BotResult = 902 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 903 assert(BotResult != NoCand && "failed to find the first candidate"); 904 905 // If either Q has a single candidate that provides the least increase in 906 // Excess pressure, we can immediately schedule from that Q. 907 // 908 // RegionCriticalPSets summarizes the pressure within the scheduled region and 909 // affects picking from either Q. If scheduling in one direction must 910 // increase pressure for one of the excess PSets, then schedule in that 911 // direction first to provide more freedom in the other direction. 912 if (BotResult == SingleExcess || BotResult == SingleCritical) { 913 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); 914 IsTopNode = false; 915 return BotCand.SU; 916 } 917 // Check if the top Q has a better candidate. 918 SchedCandidate TopCand; 919 CandResult TopResult = 920 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 921 assert(TopResult != NoCand && "failed to find the first candidate"); 922 923 if (TopResult == SingleExcess || TopResult == SingleCritical) { 924 LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); 925 IsTopNode = true; 926 return TopCand.SU; 927 } 928 // If either Q has a single candidate that minimizes pressure above the 929 // original region's pressure pick it. 930 if (BotResult == SingleMax) { 931 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 932 IsTopNode = false; 933 return BotCand.SU; 934 } 935 if (TopResult == SingleMax) { 936 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 937 IsTopNode = true; 938 return TopCand.SU; 939 } 940 if (TopCand.SCost > BotCand.SCost) { 941 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); 942 IsTopNode = true; 943 return TopCand.SU; 944 } 945 // Otherwise prefer the bottom candidate in node order. 946 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 947 IsTopNode = false; 948 return BotCand.SU; 949 } 950 951 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 952 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 953 if (DAG->top() == DAG->bottom()) { 954 assert(Top.Available.empty() && Top.Pending.empty() && 955 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 956 return nullptr; 957 } 958 SUnit *SU; 959 if (ForceTopDown) { 960 SU = Top.pickOnlyChoice(); 961 if (!SU) { 962 SchedCandidate TopCand; 963 CandResult TopResult = 964 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 965 assert(TopResult != NoCand && "failed to find the first candidate"); 966 (void)TopResult; 967 SU = TopCand.SU; 968 } 969 IsTopNode = true; 970 } else if (ForceBottomUp) { 971 SU = Bot.pickOnlyChoice(); 972 if (!SU) { 973 SchedCandidate BotCand; 974 CandResult BotResult = 975 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 976 assert(BotResult != NoCand && "failed to find the first candidate"); 977 (void)BotResult; 978 SU = BotCand.SU; 979 } 980 IsTopNode = false; 981 } else { 982 SU = pickNodeBidrectional(IsTopNode); 983 } 984 if (SU->isTopReady()) 985 Top.removeReady(SU); 986 if (SU->isBottomReady()) 987 Bot.removeReady(SU); 988 989 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 990 << " Scheduling instruction in cycle " 991 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" 992 << reportPackets() << ")\n"; 993 DAG->dumpNode(*SU)); 994 return SU; 995 } 996 997 /// Update the scheduler's state after scheduling a node. This is the same node 998 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 999 /// to update it's state based on the current cycle before MachineSchedStrategy 1000 /// does. 1001 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1002 if (IsTopNode) { 1003 Top.bumpNode(SU); 1004 SU->TopReadyCycle = Top.CurrCycle; 1005 } else { 1006 Bot.bumpNode(SU); 1007 SU->BotReadyCycle = Bot.CurrCycle; 1008 } 1009 } 1010