1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "HexagonMachineScheduler.h" 15 #include "HexagonInstrInfo.h" 16 #include "HexagonSubtarget.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/CodeGen/DFAPacketizer.h" 19 #include "llvm/CodeGen/MachineBasicBlock.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstr.h" 22 #include "llvm/CodeGen/MachineLoopInfo.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/RegisterPressure.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/CodeGen/TargetOpcodes.h" 29 #include "llvm/CodeGen/TargetRegisterInfo.h" 30 #include "llvm/CodeGen/TargetSchedule.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include <algorithm> 37 #include <cassert> 38 #include <iomanip> 39 #include <limits> 40 #include <memory> 41 #include <sstream> 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "machine-scheduler" 46 47 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", 48 cl::Hidden, cl::ZeroOrMore, cl::init(false)); 49 50 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", 51 cl::Hidden, cl::ZeroOrMore, cl::init(true)); 52 53 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 54 cl::Hidden, cl::ZeroOrMore, cl::init(1)); 55 56 // Check if the scheduler should penalize instructions that are available to 57 // early due to a zero-latency dependence. 58 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 59 cl::ZeroOrMore, cl::init(true)); 60 61 // This value is used to determine if a register class is a high pressure set. 62 // We compute the maximum number of registers needed and divided by the total 63 // available. Then, we compare the result to this value. 64 static cl::opt<float> RPThreshold("hexagon-reg-pressure", cl::Hidden, 65 cl::init(0.75f), cl::desc("High register pressure threhold.")); 66 67 /// Return true if there is a dependence between SUd and SUu. 68 static bool hasDependence(const SUnit *SUd, const SUnit *SUu, 69 const HexagonInstrInfo &QII) { 70 if (SUd->Succs.size() == 0) 71 return false; 72 73 // Enable .cur formation. 74 if (QII.mayBeCurLoad(*SUd->getInstr())) 75 return false; 76 77 if (QII.canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr())) 78 return false; 79 80 for (const auto &S : SUd->Succs) { 81 // Since we do not add pseudos to packets, might as well 82 // ignore order dependencies. 83 if (S.isCtrl()) 84 continue; 85 86 if (S.getSUnit() == SUu && S.getLatency() > 0) 87 return true; 88 } 89 return false; 90 } 91 92 /// Check if scheduling of this SU is possible 93 /// in the current packet. 94 /// It is _not_ precise (statefull), it is more like 95 /// another heuristic. Many corner cases are figured 96 /// empirically. 97 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { 98 if (!SU || !SU->getInstr()) 99 return false; 100 101 // First see if the pipeline could receive this instruction 102 // in the current cycle. 103 switch (SU->getInstr()->getOpcode()) { 104 default: 105 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 106 return false; 107 break; 108 case TargetOpcode::EXTRACT_SUBREG: 109 case TargetOpcode::INSERT_SUBREG: 110 case TargetOpcode::SUBREG_TO_REG: 111 case TargetOpcode::REG_SEQUENCE: 112 case TargetOpcode::IMPLICIT_DEF: 113 case TargetOpcode::COPY: 114 case TargetOpcode::INLINEASM: 115 case TargetOpcode::INLINEASM_BR: 116 break; 117 } 118 119 MachineBasicBlock *MBB = SU->getInstr()->getParent(); 120 auto &QST = MBB->getParent()->getSubtarget<HexagonSubtarget>(); 121 const auto &QII = *QST.getInstrInfo(); 122 123 // Now see if there are no other dependencies to instructions already 124 // in the packet. 125 if (IsTop) { 126 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 127 if (hasDependence(Packet[i], SU, QII)) 128 return false; 129 } else { 130 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 131 if (hasDependence(SU, Packet[i], QII)) 132 return false; 133 } 134 return true; 135 } 136 137 /// Keep track of available resources. 138 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { 139 bool startNewCycle = false; 140 // Artificially reset state. 141 if (!SU) { 142 ResourcesModel->clearResources(); 143 Packet.clear(); 144 TotalPackets++; 145 return false; 146 } 147 // If this SU does not fit in the packet or the packet is now full 148 // start a new one. 149 if (!isResourceAvailable(SU, IsTop) || 150 Packet.size() >= SchedModel->getIssueWidth()) { 151 ResourcesModel->clearResources(); 152 Packet.clear(); 153 TotalPackets++; 154 startNewCycle = true; 155 } 156 157 switch (SU->getInstr()->getOpcode()) { 158 default: 159 ResourcesModel->reserveResources(*SU->getInstr()); 160 break; 161 case TargetOpcode::EXTRACT_SUBREG: 162 case TargetOpcode::INSERT_SUBREG: 163 case TargetOpcode::SUBREG_TO_REG: 164 case TargetOpcode::REG_SEQUENCE: 165 case TargetOpcode::IMPLICIT_DEF: 166 case TargetOpcode::KILL: 167 case TargetOpcode::CFI_INSTRUCTION: 168 case TargetOpcode::EH_LABEL: 169 case TargetOpcode::COPY: 170 case TargetOpcode::INLINEASM: 171 case TargetOpcode::INLINEASM_BR: 172 break; 173 } 174 Packet.push_back(SU); 175 176 #ifndef NDEBUG 177 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 178 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 179 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); 180 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 181 LLVM_DEBUG(Packet[i]->getInstr()->dump()); 182 } 183 #endif 184 185 return startNewCycle; 186 } 187 188 /// schedule - Called back from MachineScheduler::runOnMachineFunction 189 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 190 /// only includes instructions that have DAG nodes, not scheduling boundaries. 191 void VLIWMachineScheduler::schedule() { 192 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " 193 << printMBBReference(*BB) << " " << BB->getName() 194 << " in_func " << BB->getParent()->getName() 195 << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); 196 197 buildDAGWithRegPressure(); 198 199 Topo.InitDAGTopologicalSorting(); 200 201 // Postprocess the DAG to add platform-specific artificial dependencies. 202 postprocessDAG(); 203 204 SmallVector<SUnit*, 8> TopRoots, BotRoots; 205 findRootsAndBiasEdges(TopRoots, BotRoots); 206 207 // Initialize the strategy before modifying the DAG. 208 SchedImpl->initialize(this); 209 210 LLVM_DEBUG(unsigned maxH = 0; 211 for (unsigned su = 0, e = SUnits.size(); su != e; 212 ++su) if (SUnits[su].getHeight() > maxH) maxH = 213 SUnits[su].getHeight(); 214 dbgs() << "Max Height " << maxH << "\n";); 215 LLVM_DEBUG(unsigned maxD = 0; 216 for (unsigned su = 0, e = SUnits.size(); su != e; 217 ++su) if (SUnits[su].getDepth() > maxD) maxD = 218 SUnits[su].getDepth(); 219 dbgs() << "Max Depth " << maxD << "\n";); 220 LLVM_DEBUG(dump()); 221 222 initQueues(TopRoots, BotRoots); 223 224 bool IsTopNode = false; 225 while (true) { 226 LLVM_DEBUG( 227 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 228 SUnit *SU = SchedImpl->pickNode(IsTopNode); 229 if (!SU) break; 230 231 if (!checkSchedLimit()) 232 break; 233 234 scheduleMI(SU, IsTopNode); 235 236 // Notify the scheduling strategy after updating the DAG. 237 SchedImpl->schedNode(SU, IsTopNode); 238 239 updateQueues(SU, IsTopNode); 240 } 241 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 242 243 placeDebugValues(); 244 245 LLVM_DEBUG({ 246 dbgs() << "*** Final schedule for " 247 << printMBBReference(*begin()->getParent()) << " ***\n"; 248 dumpSchedule(); 249 dbgs() << '\n'; 250 }); 251 } 252 253 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 254 DAG = static_cast<VLIWMachineScheduler*>(dag); 255 SchedModel = DAG->getSchedModel(); 256 257 Top.init(DAG, SchedModel); 258 Bot.init(DAG, SchedModel); 259 260 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 261 // are disabled, then these HazardRecs will be disabled. 262 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 263 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 264 const TargetInstrInfo *TII = STI.getInstrInfo(); 265 delete Top.HazardRec; 266 delete Bot.HazardRec; 267 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 268 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 269 270 delete Top.ResourceModel; 271 delete Bot.ResourceModel; 272 Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); 273 Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); 274 275 const std::vector<unsigned> &MaxPressure = 276 DAG->getRegPressure().MaxSetPressure; 277 HighPressureSets.assign(MaxPressure.size(), 0); 278 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { 279 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); 280 HighPressureSets[i] = 281 ((float) MaxPressure[i] > ((float) Limit * RPThreshold)); 282 } 283 284 assert((!ForceTopDown || !ForceBottomUp) && 285 "-misched-topdown incompatible with -misched-bottomup"); 286 } 287 288 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 289 if (SU->isScheduled) 290 return; 291 292 for (const SDep &PI : SU->Preds) { 293 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 294 unsigned MinLatency = PI.getLatency(); 295 #ifndef NDEBUG 296 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 297 #endif 298 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 299 SU->TopReadyCycle = PredReadyCycle + MinLatency; 300 } 301 Top.releaseNode(SU, SU->TopReadyCycle); 302 } 303 304 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 305 if (SU->isScheduled) 306 return; 307 308 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 309 310 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 311 I != E; ++I) { 312 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 313 unsigned MinLatency = I->getLatency(); 314 #ifndef NDEBUG 315 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 316 #endif 317 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 318 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 319 } 320 Bot.releaseNode(SU, SU->BotReadyCycle); 321 } 322 323 /// Does this SU have a hazard within the current instruction group. 324 /// 325 /// The scheduler supports two modes of hazard recognition. The first is the 326 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 327 /// supports highly complicated in-order reservation tables 328 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 329 /// 330 /// The second is a streamlined mechanism that checks for hazards based on 331 /// simple counters that the scheduler itself maintains. It explicitly checks 332 /// for instruction dispatch limitations, including the number of micro-ops that 333 /// can dispatch per cycle. 334 /// 335 /// TODO: Also check whether the SU must start a new group. 336 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 337 if (HazardRec->isEnabled()) 338 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 339 340 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 341 if (IssueCount + uops > SchedModel->getIssueWidth()) 342 return true; 343 344 return false; 345 } 346 347 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU, 348 unsigned ReadyCycle) { 349 if (ReadyCycle < MinReadyCycle) 350 MinReadyCycle = ReadyCycle; 351 352 // Check for interlocks first. For the purpose of other heuristics, an 353 // instruction that cannot issue appears as if it's not in the ReadyQueue. 354 if (ReadyCycle > CurrCycle || checkHazard(SU)) 355 356 Pending.push(SU); 357 else 358 Available.push(SU); 359 } 360 361 /// Move the boundary of scheduled code by one cycle. 362 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 363 unsigned Width = SchedModel->getIssueWidth(); 364 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 365 366 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 367 "MinReadyCycle uninitialized"); 368 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 369 370 if (!HazardRec->isEnabled()) { 371 // Bypass HazardRec virtual calls. 372 CurrCycle = NextCycle; 373 } else { 374 // Bypass getHazardType calls in case of long latency. 375 for (; CurrCycle != NextCycle; ++CurrCycle) { 376 if (isTop()) 377 HazardRec->AdvanceCycle(); 378 else 379 HazardRec->RecedeCycle(); 380 } 381 } 382 CheckPending = true; 383 384 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 385 << CurrCycle << '\n'); 386 } 387 388 /// Move the boundary of scheduled code by one SUnit. 389 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 390 bool startNewCycle = false; 391 392 // Update the reservation table. 393 if (HazardRec->isEnabled()) { 394 if (!isTop() && SU->isCall) { 395 // Calls are scheduled with their preceding instructions. For bottom-up 396 // scheduling, clear the pipeline state before emitting. 397 HazardRec->Reset(); 398 } 399 HazardRec->EmitInstruction(SU); 400 } 401 402 // Update DFA model. 403 startNewCycle = ResourceModel->reserveResources(SU, isTop()); 404 405 // Check the instruction group dispatch limit. 406 // TODO: Check if this SU must end a dispatch group. 407 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 408 if (startNewCycle) { 409 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 410 bumpCycle(); 411 } 412 else 413 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " 414 << CurrCycle << '\n'); 415 } 416 417 /// Release pending ready nodes in to the available queue. This makes them 418 /// visible to heuristics. 419 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 420 // If the available queue is empty, it is safe to reset MinReadyCycle. 421 if (Available.empty()) 422 MinReadyCycle = std::numeric_limits<unsigned>::max(); 423 424 // Check to see if any of the pending instructions are ready to issue. If 425 // so, add them to the available queue. 426 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 427 SUnit *SU = *(Pending.begin()+i); 428 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 429 430 if (ReadyCycle < MinReadyCycle) 431 MinReadyCycle = ReadyCycle; 432 433 if (ReadyCycle > CurrCycle) 434 continue; 435 436 if (checkHazard(SU)) 437 continue; 438 439 Available.push(SU); 440 Pending.remove(Pending.begin()+i); 441 --i; --e; 442 } 443 CheckPending = false; 444 } 445 446 /// Remove SU from the ready set for this boundary. 447 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 448 if (Available.isInQueue(SU)) 449 Available.remove(Available.find(SU)); 450 else { 451 assert(Pending.isInQueue(SU) && "bad ready count"); 452 Pending.remove(Pending.find(SU)); 453 } 454 } 455 456 /// If this queue only has one ready candidate, return it. As a side effect, 457 /// advance the cycle until at least one node is ready. If multiple instructions 458 /// are ready, return NULL. 459 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 460 if (CheckPending) 461 releasePending(); 462 463 auto AdvanceCycle = [this]() { 464 if (Available.empty()) 465 return true; 466 if (Available.size() == 1 && Pending.size() > 0) 467 return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || 468 getWeakLeft(*Available.begin(), isTop()) != 0; 469 return false; 470 }; 471 for (unsigned i = 0; AdvanceCycle(); ++i) { 472 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 473 "permanent hazard"); (void)i; 474 ResourceModel->reserveResources(nullptr, isTop()); 475 bumpCycle(); 476 releasePending(); 477 } 478 if (Available.size() == 1) 479 return *Available.begin(); 480 return nullptr; 481 } 482 483 #ifndef NDEBUG 484 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 485 const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) { 486 dbgs() << Label << " " << Q.getName() << " "; 487 if (P.isValid()) 488 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 489 << P.getUnitInc() << " "; 490 else 491 dbgs() << " "; 492 dbgs() << "cost(" << Cost << ")\t"; 493 DAG->dumpNode(*SU); 494 } 495 496 // Very detailed queue dump, to be used with higher verbosity levels. 497 void ConvergingVLIWScheduler::readyQueueVerboseDump( 498 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 499 ReadyQueue &Q) { 500 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 501 502 dbgs() << ">>> " << Q.getName() << "\n"; 503 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 504 RegPressureDelta RPDelta; 505 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 506 DAG->getRegionCriticalPSets(), 507 DAG->getRegPressure().MaxSetPressure); 508 std::stringstream dbgstr; 509 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 510 dbgs() << dbgstr.str(); 511 SchedulingCost(Q, *I, Candidate, RPDelta, true); 512 dbgs() << "\t"; 513 (*I)->getInstr()->dump(); 514 } 515 dbgs() << "\n"; 516 } 517 #endif 518 519 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 520 /// of SU, return true (we may have duplicates) 521 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 522 if (SU->NumPredsLeft == 0) 523 return false; 524 525 for (auto &Pred : SU->Preds) { 526 // We found an available, but not scheduled, predecessor. 527 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 528 return false; 529 } 530 531 return true; 532 } 533 534 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 535 /// of SU, return true (we may have duplicates) 536 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 537 if (SU->NumSuccsLeft == 0) 538 return false; 539 540 for (auto &Succ : SU->Succs) { 541 // We found an available, but not scheduled, successor. 542 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 543 return false; 544 } 545 return true; 546 } 547 548 /// Check if the instruction changes the register pressure of a register in the 549 /// high pressure set. The function returns a negative value if the pressure 550 /// decreases and a positive value is the pressure increases. If the instruction 551 /// doesn't use a high pressure register or doesn't change the register 552 /// pressure, then return 0. 553 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { 554 PressureDiff &PD = DAG->getPressureDiff(SU); 555 for (auto &P : PD) { 556 if (!P.isValid()) 557 continue; 558 // The pressure differences are computed bottom-up, so the comparision for 559 // an increase is positive in the bottom direction, but negative in the 560 // top-down direction. 561 if (HighPressureSets[P.getPSet()]) 562 return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); 563 } 564 return 0; 565 } 566 567 // Constants used to denote relative importance of 568 // heuristic components for cost computation. 569 static const unsigned PriorityOne = 200; 570 static const unsigned PriorityTwo = 50; 571 static const unsigned PriorityThree = 75; 572 static const unsigned ScaleTwo = 10; 573 574 /// Single point to compute overall scheduling cost. 575 /// TODO: More heuristics will be used soon. 576 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 577 SchedCandidate &Candidate, 578 RegPressureDelta &Delta, 579 bool verbose) { 580 // Initial trivial priority. 581 int ResCount = 1; 582 583 // Do not waste time on a node that is already scheduled. 584 if (!SU || SU->isScheduled) 585 return ResCount; 586 587 LLVM_DEBUG(if (verbose) dbgs() 588 << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 589 // Forced priority is high. 590 if (SU->isScheduleHigh) { 591 ResCount += PriorityOne; 592 LLVM_DEBUG(dbgs() << "H|"); 593 } 594 595 unsigned IsAvailableAmt = 0; 596 // Critical path first. 597 if (Q.getID() == TopQID) { 598 if (Top.isLatencyBound(SU)) { 599 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 600 ResCount += (SU->getHeight() * ScaleTwo); 601 } 602 603 LLVM_DEBUG(if (verbose) { 604 std::stringstream dbgstr; 605 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 606 dbgs() << dbgstr.str(); 607 }); 608 609 // If resources are available for it, multiply the 610 // chance of scheduling. 611 if (Top.ResourceModel->isResourceAvailable(SU, true)) { 612 IsAvailableAmt = (PriorityTwo + PriorityThree); 613 ResCount += IsAvailableAmt; 614 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 615 } else 616 LLVM_DEBUG(if (verbose) dbgs() << " |"); 617 } else { 618 if (Bot.isLatencyBound(SU)) { 619 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 620 ResCount += (SU->getDepth() * ScaleTwo); 621 } 622 623 LLVM_DEBUG(if (verbose) { 624 std::stringstream dbgstr; 625 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 626 dbgs() << dbgstr.str(); 627 }); 628 629 // If resources are available for it, multiply the 630 // chance of scheduling. 631 if (Bot.ResourceModel->isResourceAvailable(SU, false)) { 632 IsAvailableAmt = (PriorityTwo + PriorityThree); 633 ResCount += IsAvailableAmt; 634 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 635 } else 636 LLVM_DEBUG(if (verbose) dbgs() << " |"); 637 } 638 639 unsigned NumNodesBlocking = 0; 640 if (Q.getID() == TopQID) { 641 // How many SUs does it block from scheduling? 642 // Look at all of the successors of this node. 643 // Count the number of nodes that 644 // this node is the sole unscheduled node for. 645 if (Top.isLatencyBound(SU)) 646 for (const SDep &SI : SU->Succs) 647 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 648 ++NumNodesBlocking; 649 } else { 650 // How many unscheduled predecessors block this node? 651 if (Bot.isLatencyBound(SU)) 652 for (const SDep &PI : SU->Preds) 653 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 654 ++NumNodesBlocking; 655 } 656 ResCount += (NumNodesBlocking * ScaleTwo); 657 658 LLVM_DEBUG(if (verbose) { 659 std::stringstream dbgstr; 660 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 661 dbgs() << dbgstr.str(); 662 }); 663 664 // Factor in reg pressure as a heuristic. 665 if (!IgnoreBBRegPressure) { 666 // Decrease priority by the amount that register pressure exceeds the limit. 667 ResCount -= (Delta.Excess.getUnitInc()*PriorityOne); 668 // Decrease priority if register pressure exceeds the limit. 669 ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne); 670 // Decrease priority slightly if register pressure would increase over the 671 // current maximum. 672 ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo); 673 // If there are register pressure issues, then we remove the value added for 674 // the instruction being available. The rationale is that we really don't 675 // want to schedule an instruction that causes a spill. 676 if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && 677 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || 678 Delta.CurrentMax.getUnitInc())) 679 ResCount -= IsAvailableAmt; 680 LLVM_DEBUG(if (verbose) { 681 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 682 << Delta.CriticalMax.getUnitInc() << "/" 683 << Delta.CurrentMax.getUnitInc() << ")|"; 684 }); 685 } 686 687 // Give a little extra priority to a .cur instruction if there is a resource 688 // available for it. 689 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); 690 auto &QII = *QST.getInstrInfo(); 691 if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) { 692 if (Q.getID() == TopQID && 693 Top.ResourceModel->isResourceAvailable(SU, true)) { 694 ResCount += PriorityTwo; 695 LLVM_DEBUG(if (verbose) dbgs() << "C|"); 696 } else if (Q.getID() == BotQID && 697 Bot.ResourceModel->isResourceAvailable(SU, false)) { 698 ResCount += PriorityTwo; 699 LLVM_DEBUG(if (verbose) dbgs() << "C|"); 700 } 701 } 702 703 // Give preference to a zero latency instruction if the dependent 704 // instruction is in the current packet. 705 if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { 706 for (const SDep &PI : SU->Preds) { 707 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 708 PI.getLatency() == 0 && 709 Top.ResourceModel->isInPacket(PI.getSUnit())) { 710 ResCount += PriorityThree; 711 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 712 } 713 } 714 } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { 715 for (const SDep &SI : SU->Succs) { 716 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 717 SI.getLatency() == 0 && 718 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 719 ResCount += PriorityThree; 720 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 721 } 722 } 723 } 724 725 // If the instruction has a non-zero latency dependence with an instruction in 726 // the current packet, then it should not be scheduled yet. The case occurs 727 // when the dependent instruction is scheduled in a new packet, so the 728 // scheduler updates the current cycle and pending instructions become 729 // available. 730 if (CheckEarlyAvail) { 731 if (Q.getID() == TopQID) { 732 for (const auto &PI : SU->Preds) { 733 if (PI.getLatency() > 0 && 734 Top.ResourceModel->isInPacket(PI.getSUnit())) { 735 ResCount -= PriorityOne; 736 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 737 } 738 } 739 } else { 740 for (const auto &SI : SU->Succs) { 741 if (SI.getLatency() > 0 && 742 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 743 ResCount -= PriorityOne; 744 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 745 } 746 } 747 } 748 } 749 750 LLVM_DEBUG(if (verbose) { 751 std::stringstream dbgstr; 752 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 753 dbgs() << dbgstr.str(); 754 }); 755 756 return ResCount; 757 } 758 759 /// Pick the best candidate from the top queue. 760 /// 761 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 762 /// DAG building. To adjust for the current scheduling location we need to 763 /// maintain the number of vreg uses remaining to be top-scheduled. 764 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: 765 pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker, 766 SchedCandidate &Candidate) { 767 ReadyQueue &Q = Zone.Available; 768 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) 769 readyQueueVerboseDump(RPTracker, Candidate, Q); 770 else Q.dump();); 771 772 // getMaxPressureDelta temporarily modifies the tracker. 773 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 774 775 // BestSU remains NULL if no top candidates beat the best existing candidate. 776 CandResult FoundCandidate = NoCand; 777 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 778 RegPressureDelta RPDelta; 779 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 780 DAG->getRegionCriticalPSets(), 781 DAG->getRegPressure().MaxSetPressure); 782 783 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 784 785 // Initialize the candidate if needed. 786 if (!Candidate.SU) { 787 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 788 Candidate.SU = *I; 789 Candidate.RPDelta = RPDelta; 790 Candidate.SCost = CurrentCost; 791 FoundCandidate = NodeOrder; 792 continue; 793 } 794 795 // Choose node order for negative cost candidates. There is no good 796 // candidate in this case. 797 if (CurrentCost < 0 && Candidate.SCost < 0) { 798 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) 799 || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 800 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); 801 Candidate.SU = *I; 802 Candidate.RPDelta = RPDelta; 803 Candidate.SCost = CurrentCost; 804 FoundCandidate = NodeOrder; 805 } 806 continue; 807 } 808 809 // Best cost. 810 if (CurrentCost > Candidate.SCost) { 811 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 812 Candidate.SU = *I; 813 Candidate.RPDelta = RPDelta; 814 Candidate.SCost = CurrentCost; 815 FoundCandidate = BestCost; 816 continue; 817 } 818 819 // Choose an instruction that does not depend on an artificial edge. 820 unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); 821 unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); 822 if (CurrWeak != CandWeak) { 823 if (CurrWeak < CandWeak) { 824 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); 825 Candidate.SU = *I; 826 Candidate.RPDelta = RPDelta; 827 Candidate.SCost = CurrentCost; 828 FoundCandidate = Weak; 829 } 830 continue; 831 } 832 833 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { 834 unsigned CurrSize, CandSize; 835 if (Q.getID() == TopQID) { 836 CurrSize = (*I)->Succs.size(); 837 CandSize = Candidate.SU->Succs.size(); 838 } else { 839 CurrSize = (*I)->Preds.size(); 840 CandSize = Candidate.SU->Preds.size(); 841 } 842 if (CurrSize > CandSize) { 843 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 844 Candidate.SU = *I; 845 Candidate.RPDelta = RPDelta; 846 Candidate.SCost = CurrentCost; 847 FoundCandidate = BestCost; 848 } 849 // Keep the old candidate if it's a better candidate. That is, don't use 850 // the subsequent tie breaker. 851 if (CurrSize != CandSize) 852 continue; 853 } 854 855 // Tie breaker. 856 // To avoid scheduling indeterminism, we need a tie breaker 857 // for the case when cost is identical for two nodes. 858 if (UseNewerCandidate && CurrentCost == Candidate.SCost) { 859 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) 860 || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 861 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); 862 Candidate.SU = *I; 863 Candidate.RPDelta = RPDelta; 864 Candidate.SCost = CurrentCost; 865 FoundCandidate = NodeOrder; 866 continue; 867 } 868 } 869 870 // Fall through to original instruction order. 871 // Only consider node order if Candidate was chosen from this Q. 872 if (FoundCandidate == NoCand) 873 continue; 874 } 875 return FoundCandidate; 876 } 877 878 /// Pick the best candidate node from either the top or bottom queue. 879 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 880 // Schedule as far as possible in the direction of no choice. This is most 881 // efficient, but also provides the best heuristics for CriticalPSets. 882 if (SUnit *SU = Bot.pickOnlyChoice()) { 883 LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); 884 IsTopNode = false; 885 return SU; 886 } 887 if (SUnit *SU = Top.pickOnlyChoice()) { 888 LLVM_DEBUG(dbgs() << "Picked only Top\n"); 889 IsTopNode = true; 890 return SU; 891 } 892 SchedCandidate BotCand; 893 // Prefer bottom scheduling when heuristics are silent. 894 CandResult BotResult = pickNodeFromQueue(Bot, 895 DAG->getBotRPTracker(), BotCand); 896 assert(BotResult != NoCand && "failed to find the first candidate"); 897 898 // If either Q has a single candidate that provides the least increase in 899 // Excess pressure, we can immediately schedule from that Q. 900 // 901 // RegionCriticalPSets summarizes the pressure within the scheduled region and 902 // affects picking from either Q. If scheduling in one direction must 903 // increase pressure for one of the excess PSets, then schedule in that 904 // direction first to provide more freedom in the other direction. 905 if (BotResult == SingleExcess || BotResult == SingleCritical) { 906 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); 907 IsTopNode = false; 908 return BotCand.SU; 909 } 910 // Check if the top Q has a better candidate. 911 SchedCandidate TopCand; 912 CandResult TopResult = pickNodeFromQueue(Top, 913 DAG->getTopRPTracker(), TopCand); 914 assert(TopResult != NoCand && "failed to find the first candidate"); 915 916 if (TopResult == SingleExcess || TopResult == SingleCritical) { 917 LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); 918 IsTopNode = true; 919 return TopCand.SU; 920 } 921 // If either Q has a single candidate that minimizes pressure above the 922 // original region's pressure pick it. 923 if (BotResult == SingleMax) { 924 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 925 IsTopNode = false; 926 return BotCand.SU; 927 } 928 if (TopResult == SingleMax) { 929 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 930 IsTopNode = true; 931 return TopCand.SU; 932 } 933 if (TopCand.SCost > BotCand.SCost) { 934 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); 935 IsTopNode = true; 936 return TopCand.SU; 937 } 938 // Otherwise prefer the bottom candidate in node order. 939 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 940 IsTopNode = false; 941 return BotCand.SU; 942 } 943 944 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 945 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 946 if (DAG->top() == DAG->bottom()) { 947 assert(Top.Available.empty() && Top.Pending.empty() && 948 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 949 return nullptr; 950 } 951 SUnit *SU; 952 if (ForceTopDown) { 953 SU = Top.pickOnlyChoice(); 954 if (!SU) { 955 SchedCandidate TopCand; 956 CandResult TopResult = 957 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 958 assert(TopResult != NoCand && "failed to find the first candidate"); 959 (void)TopResult; 960 SU = TopCand.SU; 961 } 962 IsTopNode = true; 963 } else if (ForceBottomUp) { 964 SU = Bot.pickOnlyChoice(); 965 if (!SU) { 966 SchedCandidate BotCand; 967 CandResult BotResult = 968 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 969 assert(BotResult != NoCand && "failed to find the first candidate"); 970 (void)BotResult; 971 SU = BotCand.SU; 972 } 973 IsTopNode = false; 974 } else { 975 SU = pickNodeBidrectional(IsTopNode); 976 } 977 if (SU->isTopReady()) 978 Top.removeReady(SU); 979 if (SU->isBottomReady()) 980 Bot.removeReady(SU); 981 982 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 983 << " Scheduling instruction in cycle " 984 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" 985 << reportPackets() << ")\n"; 986 DAG->dumpNode(*SU)); 987 return SU; 988 } 989 990 /// Update the scheduler's state after scheduling a node. This is the same node 991 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 992 /// to update it's state based on the current cycle before MachineSchedStrategy 993 /// does. 994 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 995 if (IsTopNode) { 996 Top.bumpNode(SU); 997 SU->TopReadyCycle = Top.CurrCycle; 998 } else { 999 Bot.bumpNode(SU); 1000 SU->BotReadyCycle = Bot.CurrCycle; 1001 } 1002 } 1003