Lines Matching +full:max +full:- +full:reason
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
51 #include "llvm/Config/llvm-config.h"
74 #define DEBUG_TYPE "machine-scheduler"
80 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81 cl::desc("Force top-down list scheduling"));
82 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83 cl::desc("Force bottom-up list scheduling"));
92 "misched-postra-direction", cl::Hidden,
93 cl::desc("Post reg-alloc list scheduling direction"),
94 // Default to top-down because it was implemented first and existing targets
99 "Force top-down post reg-alloc list scheduling"),
101 "Force bottom-up post reg-alloc list scheduling"),
103 "Force bidirectional post reg-alloc list scheduling")));
105 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
109 "verify-misched", cl::Hidden,
114 "view-misched-dags", cl::Hidden,
116 cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
119 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
122 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
138 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
141 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
144 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
146 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
152 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
155 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
158 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
161 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
165 ForceFastCluster("force-fast-cluster", cl::Hidden,
170 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
176 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
179 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
184 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
188 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
193 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
204 //===----------------------------------------------------------------------===//
206 //===----------------------------------------------------------------------===//
339 "enable-misched",
344 "enable-post-misched",
345 cl::desc("Enable the post-ra machine instruction scheduling pass."),
348 /// Decrement this iterator until reaching the top or a non-debug instr.
353 while (--I != Beg) { in priorNonDebug()
354 if (!I->isDebugOrPseudoInstr()) in priorNonDebug()
360 /// Non-const version.
369 /// non-debug instruction.
374 if (!I->isDebugOrPseudoInstr()) in nextIfDebug()
380 /// Non-const version.
396 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); in createMachineScheduler()
409 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); in createPostMachineScheduler()
417 /// Top-level MachineScheduler pass driver.
420 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
422 /// scheduling regions bottom-up.
455 LLVM_DEBUG(LIS->dump()); in runOnMachineFunction()
456 MF->verify(this, "Before machine scheduling."); in runOnMachineFunction()
458 RegClassInfo->runOnMachineFunction(*MF); in runOnMachineFunction()
470 Scheduler->setDumpDirection(D); in runOnMachineFunction()
473 LLVM_DEBUG(LIS->dump()); in runOnMachineFunction()
475 MF->verify(this, "After machine scheduling."); in runOnMachineFunction()
487 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); in runOnMachineFunction()
490 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); in runOnMachineFunction()
499 MF->verify(this, "Before post machine scheduling."); in runOnMachineFunction()
511 Scheduler->setDumpDirection(D); in runOnMachineFunction()
515 MF->verify(this, "After post machine scheduling."); in runOnMachineFunction()
533 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); in isSchedBoundary()
540 /// RegionEnd is either MBB->end() or the scheduling boundary after the
560 MachineFunction *MF = MBB->getParent(); in getSchedRegions()
561 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); in getSchedRegions()
564 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); in getSchedRegions()
565 RegionEnd != MBB->begin(); RegionEnd = I) { in getSchedRegions()
568 if (RegionEnd != MBB->end() || in getSchedRegions()
570 --RegionEnd; in getSchedRegions()
577 for (;I != MBB->begin(); --I) { in getSchedRegions()
603 // TODO: Visit blocks in global postorder or postorder within the bottom-up in scheduleRegions()
605 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); in scheduleRegions()
611 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) in scheduleRegions()
614 && (int)SchedOnlyBlock != MBB->getNumber()) in scheduleRegions()
622 // no terminator then RegionEnd == MBB->end() for the bottom region. in scheduleRegions()
625 // will be processed (MBB) top-down if initialized with true. in scheduleRegions()
651 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) in scheduleRegions()
652 << " " << MBB->getName() << "\n From: " << *I in scheduleRegions()
654 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; in scheduleRegions()
658 errs() << MF->getName(); in scheduleRegions()
659 errs() << ":%bb. " << MBB->getNumber(); in scheduleRegions()
660 errs() << " " << MBB->getName() << " \n"; in scheduleRegions()
688 dbgs() << SU->NodeNum << " "; in dump()
693 //===----------------------------------------------------------------------===//
694 // ScheduleDAGMI - Basic machine instruction scheduling. This is
695 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
697 // ===----------------------------------------------------------------------===/
702 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
707 SUnit *SuccSU = SuccEdge->getSUnit(); in releaseSucc()
709 if (SuccEdge->isWeak()) { in releaseSucc()
710 --SuccSU->WeakPredsLeft; in releaseSucc()
711 if (SuccEdge->isCluster()) in releaseSucc()
716 if (SuccSU->NumPredsLeft == 0) { in releaseSucc()
723 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, in releaseSucc()
725 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) in releaseSucc()
726 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); in releaseSucc()
728 --SuccSU->NumPredsLeft; in releaseSucc()
729 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) in releaseSucc()
730 SchedImpl->releaseTopNode(SuccSU); in releaseSucc()
733 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
735 for (SDep &Succ : SU->Succs) in releaseSuccessors()
739 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
744 SUnit *PredSU = PredEdge->getSUnit(); in releasePred()
746 if (PredEdge->isWeak()) { in releasePred()
747 --PredSU->WeakSuccsLeft; in releasePred()
748 if (PredEdge->isCluster()) in releasePred()
753 if (PredSU->NumSuccsLeft == 0) { in releasePred()
760 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, in releasePred()
762 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) in releasePred()
763 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); in releasePred()
765 --PredSU->NumSuccsLeft; in releasePred()
766 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) in releasePred()
767 SchedImpl->releaseBottomNode(PredSU); in releasePred()
770 /// releasePredecessors - Call releasePred on each of SU's predecessors.
772 for (SDep &Pred : SU->Preds) in releasePredecessors()
778 SchedImpl->enterMBB(bb); in startBlock()
782 SchedImpl->leaveMBB(); in finishBlock()
786 /// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
788 /// in the region, including the boundary itself and single-instruction regions
797 SchedImpl->initPolicy(begin, end, regioninstrs); in enterRegion()
809 BB->splice(InsertPos, BB, MI); in moveInstruction()
813 LIS->handleMove(*MI, /*UpdateFlags=*/true); in moveInstruction()
831 /// Per-region scheduling driver, called back from
837 LLVM_DEBUG(SchedImpl->dumpPolicy()); in schedule()
853 SchedImpl->initialize(this); in schedule()
861 SUnit *SU = SchedImpl->pickNode(IsTopNode); in schedule()
864 assert(!SU->isScheduled && "Node already scheduled"); in schedule()
868 MachineInstr *MI = SU->getInstr(); in schedule()
870 assert(SU->isTopReady() && "node still has unscheduled dependencies"); in schedule()
876 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); in schedule()
892 SchedImpl->schedNode(SU, IsTopNode); in schedule()
902 << printMBBReference(*begin()->getParent()) << " ***\n"; in schedule()
911 m->apply(this); in postProcessDAG()
944 SchedImpl->releaseTopNode(SU); in initQueues()
950 SchedImpl->releaseBottomNode(*I); in initQueues()
956 SchedImpl->registerRoots(); in initQueues()
971 SU->isScheduled = true; in updateQueues()
978 BB->splice(RegionBegin, BB, FirstDbgValue); in placeDebugValues()
983 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { in placeDebugValues()
989 BB->splice(std::next(OrigPrevMI), BB, DbgValue); in placeDebugValues()
990 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd) in placeDebugValues()
1004 if (BB->size() < 2) in dumpScheduleTraceTopDown()
1009 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle; in dumpScheduleTraceTopDown()
1010 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle; in dumpScheduleTraceTopDown()
1019 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle) in dumpScheduleTraceTopDown()
1020 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1; in dumpScheduleTraceTopDown()
1036 NodeName += std::to_string(SU->NodeNum) + ")"; in dumpScheduleTraceTopDown()
1040 if (C == SU->TopReadyCycle) in dumpScheduleTraceTopDown()
1055 const MCWriteProcResEntry &RHS) -> bool { in dumpScheduleTraceTopDown()
1065 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) { in dumpScheduleTraceTopDown()
1068 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E; in dumpScheduleTraceTopDown()
1085 if (BB->size() < 2) in dumpScheduleTraceBottomUp()
1091 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle; in dumpScheduleTraceBottomUp()
1092 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle; in dumpScheduleTraceBottomUp()
1101 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle) in dumpScheduleTraceBottomUp()
1102 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1; in dumpScheduleTraceBottomUp()
1107 for (int C = FirstCycle; C >= LastCycle; --C) in dumpScheduleTraceBottomUp()
1118 NodeName += std::to_string(SU->NodeNum) + ")"; in dumpScheduleTraceBottomUp()
1121 for (; C >= LastCycle; --C) { in dumpScheduleTraceBottomUp()
1122 if (C == (int)SU->BotReadyCycle) in dumpScheduleTraceBottomUp()
1136 const MCWriteProcResEntry &RHS) -> bool { in dumpScheduleTraceBottomUp()
1146 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) { in dumpScheduleTraceBottomUp()
1149 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E; in dumpScheduleTraceBottomUp()
1150 ++I, --C) in dumpScheduleTraceBottomUp()
1152 while (C-- >= LastCycle) in dumpScheduleTraceBottomUp()
1184 //===----------------------------------------------------------------------===//
1185 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1187 //===----------------------------------------------------------------------===//
1207 // Ignore re-defs. in collectVRegUses()
1223 if (UI->SU == &SU) in collectVRegUses()
1231 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1233 /// the region, including the boundary itself and single-instruction regions
1240 // ScheduleDAGMI initializes SchedImpl's per-region policy. in enterRegion()
1244 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); in enterRegion()
1248 ShouldTrackPressure = SchedImpl->shouldTrackPressure(); in enterRegion()
1249 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); in enterRegion()
1291 // uses of the same vreg below the live-out reaching def. in initRegPressure()
1307 (RegionEnd->isDebugInstr() && in initRegPressure()
1312 // the max pressure in the scheduled code for these sets. in initRegPressure()
1317 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); in initRegPressure()
1319 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit in initRegPressure()
1327 << TRI->getRegPressureSetName(RCPS.getPSet()) << " "; in initRegPressure()
1344 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max()) in updateScheduledPressure()
1347 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); in updateScheduledPressure()
1348 if (NewMaxPressure[ID] >= Limit - 2) { in updateScheduledPressure()
1349 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " in updateScheduledPressure()
1364 /// FIXME: Currently assuming single-use physregs. in updatePressureDiffs()
1394 // instruction's live-out. in updatePressureDiffs()
1395 const LiveInterval &LI = LIS->getInterval(Reg); in updatePressureDiffs()
1398 nextIfDebug(BotRPTracker.getPos(), BB->end()); in updatePressureDiffs()
1399 if (I == BB->end()) in updatePressureDiffs()
1400 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); in updatePressureDiffs()
1402 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I)); in updatePressureDiffs()
1412 if (!SU->isScheduled && SU != &ExitSU) { in updatePressureDiffs()
1414 LI.Query(LIS->getInstructionIndex(*SU->getInstr())); in updatePressureDiffs()
1418 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " in updatePressureDiffs()
1419 << *SU->getInstr(); in updatePressureDiffs()
1451 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1463 LLVM_DEBUG(SchedImpl->dumpPolicy()); in schedule()
1473 SchedImpl->initialize(this); in schedule()
1485 SUnit *SU = SchedImpl->pickNode(IsTopNode); in schedule()
1488 assert(!SU->isScheduled && "Node already scheduled"); in schedule()
1495 unsigned SubtreeID = DFSResult->getSubtreeID(SU); in schedule()
1498 DFSResult->scheduleTree(SubtreeID); in schedule()
1499 SchedImpl->scheduleTree(SubtreeID); in schedule()
1504 SchedImpl->schedNode(SU, IsTopNode); in schedule()
1514 << printMBBReference(*begin()->getParent()) << " ***\n"; in schedule()
1547 DFSResult->clear(); in computeDFSResult()
1549 DFSResult->resize(SUnits.size()); in computeDFSResult()
1550 DFSResult->compute(SUnits); in computeDFSResult()
1551 ScheduledTrees.resize(DFSResult->getNumSubtrees()); in computeDFSResult()
1554 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1560 /// The cyclic path estimation identifies a def-use pair that crosses the back
1565 /// a->b(a,c)->c(b)->d(c)->exit
1567 /// The cyclic critical path is a two cycles: b->c->b
1568 /// The acyclic critical path is four cycles: a->b->c->d->exit
1569 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1570 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1571 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1572 /// LiveInDepth = depth(b) = len(a->b) = 1
1574 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1575 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1582 if (!BB->isSuccessor(BB)) in computeCyclicCriticalPath()
1591 const LiveInterval &LI = LIS->getInterval(Reg); in computeCyclicCriticalPath()
1592 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); in computeCyclicCriticalPath()
1596 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); in computeCyclicCriticalPath()
1601 unsigned LiveOutHeight = DefSU->getHeight(); in computeCyclicCriticalPath()
1602 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; in computeCyclicCriticalPath()
1611 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr())); in computeCyclicCriticalPath()
1612 if (!LRQ.valueIn()->isPHIDef()) in computeCyclicCriticalPath()
1619 if (LiveOutDepth > SU->getDepth()) in computeCyclicCriticalPath()
1620 CyclicLatency = LiveOutDepth - SU->getDepth(); in computeCyclicCriticalPath()
1622 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; in computeCyclicCriticalPath()
1624 if (LiveInHeight - LiveOutHeight < CyclicLatency) in computeCyclicCriticalPath()
1625 CyclicLatency = LiveInHeight - LiveOutHeight; in computeCyclicCriticalPath()
1629 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" in computeCyclicCriticalPath()
1630 << SU->NodeNum << ") = " << CyclicLatency << "c\n"); in computeCyclicCriticalPath()
1639 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1653 MachineInstr *MI = SU->getInstr(); in scheduleMI()
1656 assert(SU->isTopReady() && "node still has unscheduled dependencies"); in scheduleMI()
1670 // Adjust liveness and add missing dead+read-undef flags. in scheduleMI()
1671 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); in scheduleMI()
1674 // Adjust for missing dead-def flags. in scheduleMI()
1686 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); in scheduleMI()
1705 // Adjust liveness and add missing dead+read-undef flags. in scheduleMI()
1706 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); in scheduleMI()
1709 // Adjust for missing dead-def flags. in scheduleMI()
1727 //===----------------------------------------------------------------------===//
1728 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1729 //===----------------------------------------------------------------------===//
1733 /// Post-process the DAG to create cluster edges between neighboring
1750 if (A->getType() != B->getType()) in Compare()
1751 return A->getType() < B->getType(); in Compare()
1752 if (A->isReg()) in Compare()
1753 return A->getReg() < B->getReg(); in Compare()
1754 if (A->isFI()) { in Compare()
1755 const MachineFunction &MF = *A->getParent()->getParent()->getParent(); in Compare()
1759 return StackGrowsDown ? A->getIndex() > B->getIndex() in Compare()
1760 : A->getIndex() < B->getIndex(); in Compare()
1779 return SU->NodeNum < RHS.SU->NodeNum; in operator <()
1846 // following load/store one by one, until reach the first non-dependent one and
1858 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { in clusterNeighboringMemOps()
1867 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) && in clusterNeighboringMemOps()
1869 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) && in clusterNeighboringMemOps()
1870 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU)))) in clusterNeighboringMemOps()
1879 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) { in clusterNeighboringMemOps()
1880 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1; in clusterNeighboringMemOps()
1881 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + in clusterNeighboringMemOps()
1885 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset, in clusterNeighboringMemOps()
1893 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum) in clusterNeighboringMemOps()
1897 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) in clusterNeighboringMemOps()
1900 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" in clusterNeighboringMemOps()
1901 << SUb->NodeNum << ")\n"); in clusterNeighboringMemOps()
1909 for (const SDep &Succ : SUa->Succs) { in clusterNeighboringMemOps()
1912 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum in clusterNeighboringMemOps()
1914 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); in clusterNeighboringMemOps()
1918 // SUb dependent on scheduled in-between SUb and SUa. Successor edges in clusterNeighboringMemOps()
1923 for (const SDep &Pred : SUb->Preds) { in clusterNeighboringMemOps()
1926 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum in clusterNeighboringMemOps()
1928 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial)); in clusterNeighboringMemOps()
1932 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength, in clusterNeighboringMemOps()
1944 if ((IsLoad && !SU.getInstr()->mayLoad()) || in collectMemOpRecords()
1945 (!IsLoad && !SU.getInstr()->mayStore())) in collectMemOpRecords()
1953 if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, in collectMemOpRecords()
1974 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold; in groupMemOps()
1977 unsigned ChainPredID = DAG->SUnits.size(); in groupMemOps()
1979 for (const SDep &Pred : MemOp.SU->Preds) { in groupMemOps()
1980 // We only want to cluster the mem ops that have the same ctrl(non-data) in groupMemOps()
1985 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) && in groupMemOps()
1987 ChainPredID = Pred.getSUnit()->NodeNum; in groupMemOps()
2003 collectMemOpRecords(DAG->SUnits, MemOpRecords); in apply()
2024 //===----------------------------------------------------------------------===//
2025 // CopyConstrain - DAG post-processing to encourage copy elimination.
2026 //===----------------------------------------------------------------------===//
2030 /// Post-process the DAG to create weak edges from all uses of a copy to
2037 // RegionEndIdx is the slot index of the last non-debug instruction in the
2068 /// (create pred->succ edges I0->I1, I2->I1)
2075 /// (create pred->succ edges I1->I2, I3->I2)
2082 LiveIntervals *LIS = DAG->getLIS(); in constrainLocalCopy()
2083 MachineInstr *Copy = CopySU->getInstr(); in constrainLocalCopy()
2086 const MachineOperand &SrcOp = Copy->getOperand(1); in constrainLocalCopy()
2091 const MachineOperand &DstOp = Copy->getOperand(0); in constrainLocalCopy()
2104 LiveInterval *LocalLI = &LIS->getInterval(LocalReg); in constrainLocalCopy()
2105 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { in constrainLocalCopy()
2108 LocalLI = &LIS->getInterval(LocalReg); in constrainLocalCopy()
2109 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) in constrainLocalCopy()
2112 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); in constrainLocalCopy()
2115 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); in constrainLocalCopy()
2116 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a in constrainLocalCopy()
2120 if (GlobalSegment == GlobalLI->end()) in constrainLocalCopy()
2123 // If GlobalSegment is killed at the LocalLI->start, the call to find() in constrainLocalCopy()
2125 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI in constrainLocalCopy()
2127 if (GlobalSegment->contains(LocalLI->beginIndex())) in constrainLocalCopy()
2130 if (GlobalSegment == GlobalLI->end()) in constrainLocalCopy()
2134 if (GlobalSegment != GlobalLI->begin()) { in constrainLocalCopy()
2136 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, in constrainLocalCopy()
2137 GlobalSegment->start)) { in constrainLocalCopy()
2140 // If the prior global segment may be defined by the same two-address in constrainLocalCopy()
2142 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, in constrainLocalCopy()
2143 LocalLI->beginIndex())) { in constrainLocalCopy()
2148 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && in constrainLocalCopy()
2151 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); in constrainLocalCopy()
2155 SUnit *GlobalSU = DAG->getSUnit(GlobalDef); in constrainLocalCopy()
2162 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); in constrainLocalCopy()
2163 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); in constrainLocalCopy()
2164 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); in constrainLocalCopy()
2165 for (const SDep &Succ : LastLocalSU->Succs) { in constrainLocalCopy()
2170 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) in constrainLocalCopy()
2178 LIS->getInstructionFromIndex(LocalLI->beginIndex()); in constrainLocalCopy()
2179 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); in constrainLocalCopy()
2180 for (const SDep &Pred : GlobalSU->Preds) { in constrainLocalCopy()
2185 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) in constrainLocalCopy()
2189 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); in constrainLocalCopy()
2192 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU(" in constrainLocalCopy()
2193 << GlobalSU->NodeNum << ")\n"); in constrainLocalCopy()
2194 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak)); in constrainLocalCopy()
2197 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU(" in constrainLocalCopy()
2198 << FirstLocalSU->NodeNum << ")\n"); in constrainLocalCopy()
2199 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak)); in constrainLocalCopy()
2207 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); in apply()
2209 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); in apply()
2210 if (FirstPos == DAG->end()) in apply()
2212 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos); in apply()
2213 RegionEndIdx = DAG->getLIS()->getInstructionIndex( in apply()
2214 *priorNonDebug(DAG->end(), DAG->begin())); in apply()
2216 for (SUnit &SU : DAG->SUnits) { in apply()
2217 if (!SU.getInstr()->isCopy()) in apply()
2224 //===----------------------------------------------------------------------===//
2227 //===----------------------------------------------------------------------===//
2239 int ResCntFactor = (int)(Count - (Latency * LFactor)); in checkResourceLimit()
2250 if (HazardRec && HazardRec->isEnabled()) { in reset()
2259 MinReadyCycle = std::numeric_limits<unsigned>::max(); in reset()
2276 // Reserve a zero-count for invalid CritResIdx. in reset()
2284 if (!SchedModel->hasInstrSchedModel()) in init()
2286 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); in init()
2287 for (SUnit &SU : DAG->SUnits) { in init()
2288 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); in init()
2289 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) in init()
2290 * SchedModel->getMicroOpFactor(); in init()
2292 PI = SchedModel->getWriteProcResBegin(SC), in init()
2293 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { in init()
2294 unsigned PIdx = PI->ProcResourceIdx; in init()
2295 unsigned Factor = SchedModel->getResourceFactor(PIdx); in init()
2296 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle); in init()
2298 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle)); in init()
2309 if (SchedModel->hasInstrSchedModel()) { in init()
2310 unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); in init()
2318 NumUnits += SchedModel->getProcResource(i)->NumUnits; in init()
2320 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin; in init()
2321 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits; in init()
2333 /// resources and computed by checkHazard(). A fully in-order model
2335 /// available for scheduling until they are ready. However, a weaker in-order
2336 /// model may use this for heuristics. For example, if a processor has in-order
2339 if (!SU->isUnbuffered) in getLatencyStallCycles()
2342 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); in getLatencyStallCycles()
2344 return ReadyCycle - CurrCycle; in getLatencyStallCycles()
2353 if (SchedModel && SchedModel->enableIntervals()) { in getNextResourceCycleByInstance()
2366 // For bottom-up scheduling add the cycles needed for the current operation. in getNextResourceCycleByInstance()
2368 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle); in getNextResourceCycleByInstance()
2387 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; in getNextResourceCycle()
2404 make_range(SchedModel->getWriteProcResBegin(SC), in getNextResourceCycle()
2405 SchedModel->getWriteProcResEnd(SC))) in getNextResourceCycle()
2411 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin; in getNextResourceCycle()
2429 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @" in getNextResourceCycle()
2437 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx) in getNextResourceCycle()
2438 << "[" << InstanceIdx - StartIndex << "]" in getNextResourceCycle()
2448 /// supports highly complicated in-order reservation tables
2449 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2453 /// for instruction dispatch limitations, including the number of micro-ops that
2458 if (HazardRec->isEnabled() in checkHazard()
2459 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { in checkHazard()
2463 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); in checkHazard()
2464 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { in checkHazard()
2465 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" in checkHazard()
2466 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); in checkHazard()
2471 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || in checkHazard()
2472 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { in checkHazard()
2473 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " in checkHazard()
2478 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { in checkHazard()
2479 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); in checkHazard()
2481 make_range(SchedModel->getWriteProcResBegin(SC), in checkHazard()
2482 SchedModel->getWriteProcResEnd(SC))) { in checkHazard()
2491 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall); in checkHazard()
2493 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " in checkHazard()
2494 << SchedModel->getResourceName(ResIdx) in checkHazard()
2495 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' in checkHazard()
2518 << LateSU->NodeNum << ") " << RemLatency << "c\n"); in findMaxLatency()
2524 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2529 if (!SchedModel->hasInstrSchedModel()) in getOtherResourceCount()
2532 unsigned OtherCritCount = Rem->RemIssueCount in getOtherResourceCount()
2533 + (RetiredMOps * SchedModel->getMicroOpFactor()); in getOtherResourceCount()
2535 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); in getOtherResourceCount()
2536 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); in getOtherResourceCount()
2538 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; in getOtherResourceCount()
2547 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) in getOtherResourceCount()
2548 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); in getOtherResourceCount()
2555 assert(SU->getInstr() && "Scheduled SUnit must have instr"); in releaseNode()
2562 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); in releaseNode()
2570 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; in releaseNode()
2588 if (SchedModel->getMicroOpBufferSize() == 0) { in bumpCycle()
2589 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && in bumpCycle()
2594 // Update the current micro-ops, which will issue in the next cycle. in bumpCycle()
2595 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); in bumpCycle()
2596 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; in bumpCycle()
2599 if ((NextCycle - CurrCycle) > DependentLatency) in bumpCycle()
2602 DependentLatency -= (NextCycle - CurrCycle); in bumpCycle()
2604 if (!HazardRec->isEnabled()) { in bumpCycle()
2611 HazardRec->AdvanceCycle(); in bumpCycle()
2613 HazardRec->RecedeCycle(); in bumpCycle()
2618 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), in bumpCycle()
2633 /// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2636 /// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2645 unsigned Factor = SchedModel->getResourceFactor(PIdx); in countResource()
2646 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle); in countResource()
2647 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" in countResource()
2652 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); in countResource()
2653 Rem->RemainingCounts[PIdx] -= Count; in countResource()
2660 << SchedModel->getResourceName(PIdx) << ": " in countResource()
2661 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() in countResource()
2670 << SchedModel->getResourceName(PIdx) in countResource()
2671 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']' in countResource()
2680 if (HazardRec->isEnabled()) { in bumpNode()
2681 if (!isTop() && SU->isCall) { in bumpNode()
2682 // Calls are scheduled with their preceding instructions. For bottom-up in bumpNode()
2684 HazardRec->Reset(); in bumpNode()
2686 HazardRec->EmitInstruction(SU); in bumpNode()
2692 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); in bumpNode()
2693 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); in bumpNode()
2695 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && in bumpNode()
2698 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); in bumpNode()
2702 switch (SchedModel->getMicroOpBufferSize()) { in bumpNode()
2714 // scheduled MOps to be "retired". We do loosely model in-order resource in bumpNode()
2715 // latency. If this instruction uses an in-order resource, account for any in bumpNode()
2717 if (SU->isUnbuffered && ReadyCycle > NextCycle) in bumpNode()
2724 if (SchedModel->hasInstrSchedModel()) { in bumpNode()
2725 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); in bumpNode()
2726 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); in bumpNode()
2727 Rem->RemIssueCount -= DecRemIssue; in bumpNode()
2729 // Scale scheduled micro-ops for comparing with the critical resource. in bumpNode()
2731 RetiredMOps * SchedModel->getMicroOpFactor(); in bumpNode()
2733 // If scaled micro-ops are now more than the previous critical resource by in bumpNode()
2734 // a full cycle, then micro-ops issue becomes critical. in bumpNode()
2735 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) in bumpNode()
2736 >= (int)SchedModel->getLatencyFactor()) { in bumpNode()
2739 << ScaledMOps / SchedModel->getLatencyFactor() in bumpNode()
2744 PI = SchedModel->getWriteProcResBegin(SC), in bumpNode()
2745 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { in bumpNode()
2747 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle, in bumpNode()
2748 PI->AcquireAtCycle); in bumpNode()
2752 if (SU->hasReservedResource) { in bumpNode()
2754 // For top-down scheduling, this is the cycle in which we schedule this in bumpNode()
2756 // resource. For bottom-up is it simply the instruction's cycle. in bumpNode()
2758 PI = SchedModel->getWriteProcResBegin(SC), in bumpNode()
2759 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { in bumpNode()
2760 unsigned PIdx = PI->ProcResourceIdx; in bumpNode()
2761 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { in bumpNode()
2763 if (SchedModel && SchedModel->enableIntervals()) { in bumpNode()
2766 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle); in bumpNode()
2770 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle), in bumpNode()
2775 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle), in bumpNode()
2782 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle); in bumpNode()
2785 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle); in bumpNode()
2796 if (SU->getDepth() > TopLatency) { in bumpNode()
2797 TopLatency = SU->getDepth(); in bumpNode()
2799 << SU->NodeNum << ") " << TopLatency << "c\n"); in bumpNode()
2801 if (SU->getHeight() > BotLatency) { in bumpNode()
2802 BotLatency = SU->getHeight(); in bumpNode()
2804 << SU->NodeNum << ") " << BotLatency << "c\n"); in bumpNode()
2806 // If we stall for any reason, bump the cycle. in bumpNode()
2813 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), in bumpNode()
2818 // one cycle. Since we commonly reach the max MOps here, opportunistically in bumpNode()
2826 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) || in bumpNode()
2827 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { in bumpNode()
2833 while (CurrMOps >= SchedModel->getIssueWidth()) { in bumpNode()
2834 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle " in bumpNode()
2846 MinReadyCycle = std::numeric_limits<unsigned>::max(); in releasePending()
2852 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; in releasePending()
2862 --I; in releasePending()
2863 --E; in releasePending()
2896 // FIXME: Re-enable assert once PR20057 is resolved. in pickOnlyChoice()
2897 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && in pickOnlyChoice()
2918 if (!SchedModel->hasInstrSchedModel()) in dumpReservedCycles()
2921 unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); in dumpReservedCycles()
2925 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits; in dumpReservedCycles()
2926 std::string ResName = SchedModel->getResourceName(ResIdx); in dumpReservedCycles()
2929 if (SchedModel && SchedModel->enableIntervals()) { in dumpReservedCycles()
2947 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); in dumpScheduledState()
2950 ResFactor = SchedModel->getMicroOpFactor(); in dumpScheduledState()
2953 unsigned LFactor = SchedModel->getLatencyFactor(); in dumpScheduledState()
2959 << SchedModel->getResourceName(ZoneCritResIdx) in dumpScheduledState()
2961 << (IsResourceLimited ? " - Resource" : " - Latency") in dumpScheduledState()
2968 //===----------------------------------------------------------------------===//
2969 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2970 //===----------------------------------------------------------------------===//
2978 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); in initResourceDelta()
2980 PI = SchedModel->getWriteProcResBegin(SC), in initResourceDelta()
2981 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { in initResourceDelta()
2982 if (PI->ProcResourceIdx == Policy.ReduceResIdx) in initResourceDelta()
2983 ResDelta.CritResources += PI->ReleaseAtCycle; in initResourceDelta()
2984 if (PI->ProcResourceIdx == Policy.DemandResIdx) in initResourceDelta()
2985 ResDelta.DemandedResources += PI->ReleaseAtCycle; in initResourceDelta()
2990 /// overall schedule has become latency-limited and whether the instructions
2994 /// max height/depth of scheduled nodes minus the cycles since it was
2996 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2998 /// The "independent" latency is the max ready queue depth:
2999 /// ILat = max N.depth for N in Available|Pending
3007 RemLatency = std::max(RemLatency, in computeRemLatency()
3009 RemLatency = std::max(RemLatency, in computeRemLatency()
3047 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; in setPolicy()
3052 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) { in setPolicy()
3055 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), in setPolicy()
3060 // acyclic latency during PostRA, and highly out-of-order processors will in setPolicy()
3077 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n"; in setPolicy()
3080 << SchedModel->getResourceName(OtherCritIdx) << "\n"; in setPolicy()
3093 GenericSchedulerBase::CandReason Reason) { in getReasonStr() argument
3094 switch (Reason) { in getReasonStr()
3097 case PhysReg: return "PHYS-REG "; in getReasonStr()
3098 case RegExcess: return "REG-EXCESS"; in getReasonStr()
3099 case RegCritical: return "REG-CRIT "; in getReasonStr()
3103 case RegMax: return "REG-MAX "; in getReasonStr()
3104 case ResourceReduce: return "RES-REDUCE"; in getReasonStr()
3105 case ResourceDemand: return "RES-DEMAND"; in getReasonStr()
3106 case TopDepthReduce: return "TOP-DEPTH "; in getReasonStr()
3107 case TopPathReduce: return "TOP-PATH "; in getReasonStr()
3108 case BotHeightReduce:return "BOT-HEIGHT"; in getReasonStr()
3109 case BotPathReduce: return "BOT-PATH "; in getReasonStr()
3110 case NextDefUse: return "DEF-USE "; in getReasonStr()
3113 llvm_unreachable("Unknown reason!"); in getReasonStr()
3120 switch (Cand.Reason) { in traceCandidate()
3139 Latency = Cand.SU->getDepth(); in traceCandidate()
3142 Latency = Cand.SU->getHeight(); in traceCandidate()
3145 Latency = Cand.SU->getHeight(); in traceCandidate()
3148 Latency = Cand.SU->getDepth(); in traceCandidate()
3151 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); in traceCandidate()
3153 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) in traceCandidate()
3158 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; in traceCandidate()
3176 GenericSchedulerBase::CandReason Reason) { in tryLess() argument
3178 TryCand.Reason = Reason; in tryLess()
3182 if (Cand.Reason > Reason) in tryLess()
3183 Cand.Reason = Reason; in tryLess()
3192 GenericSchedulerBase::CandReason Reason) { in tryGreater() argument
3194 TryCand.Reason = Reason; in tryGreater()
3198 if (Cand.Reason > Reason) in tryGreater()
3199 Cand.Reason = Reason; in tryGreater()
3212 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) > in tryLatency()
3214 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), in tryLatency()
3218 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), in tryLatency()
3225 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) > in tryLatency()
3227 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), in tryLatency()
3231 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), in tryLatency()
3239 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { in tracePick() argument
3241 << GenericSchedulerBase::getReasonStr(Reason) << '\n'); in tracePick()
3245 tracePick(Cand.Reason, Cand.AtTop); in tracePick()
3249 assert(dag->hasVRegLiveness() && in initialize()
3252 SchedModel = DAG->getSchedModel(); in initialize()
3253 TRI = DAG->TRI; in initialize()
3256 DAG->computeDFSResult(); in initialize()
3266 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); in initialize()
3268 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); in initialize()
3271 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); in initialize()
3277 /// Initialize the per-region scheduling policy.
3281 const MachineFunction &MF = *Begin->getMF(); in initPolicy()
3289 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) { in initPolicy()
3291 if (TLI->isTypeLegal(LegalIntVT)) { in initPolicy()
3292 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( in initPolicy()
3293 TLI->getRegClassFor(LegalIntVT)); in initPolicy()
3299 // For generic targets, we default to bottom-up, because it's simpler and more in initPolicy()
3300 // compile-time optimizations have been implemented in that direction. in initPolicy()
3312 // Check -misched-topdown/bottomup can force or unforce scheduling direction. in initPolicy()
3313 // e.g. -misched-bottomup=false allows scheduling in both directions. in initPolicy()
3315 "-misched-topdown incompatible with -misched-bottomup"); in initPolicy()
3341 /// We estimate an upper bounds on in-flight instructions as:
3343 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3354 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), in checkAcyclicLatency()
3357 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); in checkAcyclicLatency()
3360 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; in checkAcyclicLatency()
3362 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); in checkAcyclicLatency()
3368 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " in checkAcyclicLatency()
3369 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() in checkAcyclicLatency()
3370 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount in checkAcyclicLatency()
3371 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() in checkAcyclicLatency()
3372 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; in checkAcyclicLatency()
3377 Rem.CriticalPath = DAG->ExitSU.getDepth(); in registerRoots()
3381 if (SU->getDepth() > Rem.CriticalPath) in registerRoots()
3382 Rem.CriticalPath = SU->getDepth(); in registerRoots()
3384 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); in registerRoots()
3386 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; in registerRoots()
3389 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) { in registerRoots()
3390 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); in registerRoots()
3400 GenericSchedulerBase::CandReason Reason, in tryPressure() argument
3406 Reason)) { in tryPressure()
3420 Reason); in tryPressure()
3423 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : in tryPressure()
3424 std::numeric_limits<int>::max(); in tryPressure()
3426 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : in tryPressure()
3427 std::numeric_limits<int>::max(); in tryPressure()
3432 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); in tryPressure()
3436 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; in getWeakLeft()
3447 const MachineInstr *MI = SU->getInstr(); in biasPhysReg()
3449 if (MI->isCopy()) { in biasPhysReg()
3454 if (MI->getOperand(ScheduledOper).getReg().isPhysical()) in biasPhysReg()
3458 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; in biasPhysReg()
3459 if (MI->getOperand(UnscheduledOper).getReg().isPhysical()) in biasPhysReg()
3460 return AtBoundary ? -1 : 1; in biasPhysReg()
3463 if (MI->isMoveImmediate()) { in biasPhysReg()
3468 for (const MachineOperand &Op : MI->defs()) { in biasPhysReg()
3476 return isTop ? -1 : 1; in biasPhysReg()
3489 if (DAG->isTrackingPressure()) { in initCandidate()
3492 Cand.SU->getInstr(), in initCandidate()
3494 DAG->getRegionCriticalPSets(), in initCandidate()
3495 DAG->getRegPressure().MaxSetPressure); in initCandidate()
3499 Cand.SU->getInstr(), in initCandidate()
3500 &DAG->getPressureDiff(Cand.SU), in initCandidate()
3502 DAG->getRegionCriticalPSets(), in initCandidate()
3503 DAG->getRegPressure().MaxSetPressure); in initCandidate()
3506 Cand.SU->getInstr(), in initCandidate()
3507 DAG->getPressureDiff(Cand.SU), in initCandidate()
3509 DAG->getRegionCriticalPSets(), in initCandidate()
3510 DAG->getRegPressure().MaxSetPressure); in initCandidate()
3515 << " Try SU(" << Cand.SU->NodeNum << ") " in initCandidate()
3516 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":" in initCandidate()
3530 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3536 TryCand.Reason = NodeOrder; in tryCandidate()
3543 return TryCand.Reason != NoCand; in tryCandidate()
3546 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, in tryCandidate()
3549 DAG->MF)) in tryCandidate()
3550 return TryCand.Reason != NoCand; in tryCandidate()
3552 // Avoid increasing the max critical pressure in the scheduled region. in tryCandidate()
3553 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, in tryCandidate()
3556 DAG->MF)) in tryCandidate()
3557 return TryCand.Reason != NoCand; in tryCandidate()
3563 // "tie-breaking" in nature. in tryCandidate()
3569 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && in tryCandidate()
3571 return TryCand.Reason != NoCand; in tryCandidate()
3574 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), in tryCandidate()
3575 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) in tryCandidate()
3576 return TryCand.Reason != NoCand; in tryCandidate()
3582 // This is a best effort to set things up for a post-RA pass. Optimizations in tryCandidate()
3586 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); in tryCandidate()
3588 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); in tryCandidate()
3592 return TryCand.Reason != NoCand; in tryCandidate()
3599 return TryCand.Reason != NoCand; in tryCandidate()
3602 // Avoid increasing the max pressure of the entire region. in tryCandidate()
3603 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, in tryCandidate()
3606 DAG->MF)) in tryCandidate()
3607 return TryCand.Reason != NoCand; in tryCandidate()
3614 return TryCand.Reason != NoCand; in tryCandidate()
3618 return TryCand.Reason != NoCand; in tryCandidate()
3624 return TryCand.Reason != NoCand; in tryCandidate()
3627 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) in tryCandidate()
3628 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { in tryCandidate()
3629 TryCand.Reason = NodeOrder; in tryCandidate()
3641 /// maintain the number of vreg uses remaining to be top-scheduled.
3680 // Set the bottom-up policy based on the state of the current bottom zone and in pickNodeBidirectional()
3684 // Set the top-down policy based on the state of the current top zone and in pickNodeBidirectional()
3691 if (!BotCand.isValid() || BotCand.SU->isScheduled || in pickNodeBidirectional()
3694 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); in pickNodeBidirectional()
3695 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); in pickNodeBidirectional()
3702 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); in pickNodeBidirectional()
3704 "Last pick result should correspond to re-picking right now"); in pickNodeBidirectional()
3711 if (!TopCand.isValid() || TopCand.SU->isScheduled || in pickNodeBidirectional()
3714 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); in pickNodeBidirectional()
3715 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); in pickNodeBidirectional()
3722 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); in pickNodeBidirectional()
3724 "Last pick result should correspond to re-picking right now"); in pickNodeBidirectional()
3733 TopCand.Reason = NoCand; in pickNodeBidirectional()
3746 if (DAG->top() == DAG->bottom()) { in pickNode()
3758 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); in pickNode()
3759 assert(TopCand.Reason != NoCand && "failed to find a candidate"); in pickNode()
3769 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); in pickNode()
3770 assert(BotCand.Reason != NoCand && "failed to find a candidate"); in pickNode()
3778 } while (SU->isScheduled); in pickNode()
3794 // removed so it does not get re-picked in a subsequent pickNode call. in pickNode()
3795 if (SU->isTopReady()) in pickNode()
3797 if (SU->isBottomReady()) in pickNode()
3800 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " in pickNode()
3801 << *SU->getInstr()); in pickNode()
3806 MachineBasicBlock::iterator InsertPos = SU->getInstr(); in reschedulePhysReg()
3809 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; in reschedulePhysReg()
3818 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) in reschedulePhysReg()
3820 MachineInstr *Copy = DepSU->getInstr(); in reschedulePhysReg()
3821 if (!Copy->isCopy() && !Copy->isMoveImmediate()) in reschedulePhysReg()
3824 DAG->dumpNode(*Dep.getSUnit())); in reschedulePhysReg()
3825 DAG->moveInstruction(Copy, InsertPos); in reschedulePhysReg()
3838 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); in schedNode()
3840 if (SU->hasPhysRegUses) in schedNode()
3843 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); in schedNode()
3845 if (SU->hasPhysRegDefs) in schedNode()
3855 // Register DAG post-processors. in createGenericSchedLive()
3860 DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); in createGenericSchedLive()
3862 const TargetSubtargetInfo &STI = C->MF->getSubtarget(); in createGenericSchedLive()
3866 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); in createGenericSchedLive()
3878 //===----------------------------------------------------------------------===//
3879 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3880 //===----------------------------------------------------------------------===//
3884 SchedModel = DAG->getSchedModel(); in initialize()
3885 TRI = DAG->TRI; in initialize()
3893 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); in initialize()
3895 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); in initialize()
3898 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); in initialize()
3918 Rem.CriticalPath = DAG->ExitSU.getDepth(); in registerRoots()
3922 if (SU->getDepth() > Rem.CriticalPath) in registerRoots()
3923 Rem.CriticalPath = SU->getDepth(); in registerRoots()
3925 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); in registerRoots()
3927 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; in registerRoots()
3935 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3940 TryCand.Reason = NodeOrder; in tryCandidate()
3947 return TryCand.Reason != NoCand; in tryCandidate()
3950 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), in tryCandidate()
3951 Cand.SU == DAG->getNextClusterSucc(), in tryCandidate()
3953 return TryCand.Reason != NoCand; in tryCandidate()
3958 return TryCand.Reason != NoCand; in tryCandidate()
3962 return TryCand.Reason != NoCand; in tryCandidate()
3966 return TryCand.Reason != NoCand; in tryCandidate()
3970 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { in tryCandidate()
3971 TryCand.Reason = NodeOrder; in tryCandidate()
4010 // Set the bottom-up policy based on the state of the current bottom zone and in pickNodeBidirectional()
4014 // Set the top-down policy based on the state of the current top zone and in pickNodeBidirectional()
4021 if (!BotCand.isValid() || BotCand.SU->isScheduled || in pickNodeBidirectional()
4025 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); in pickNodeBidirectional()
4034 "Last pick result should correspond to re-picking right now"); in pickNodeBidirectional()
4041 if (!TopCand.isValid() || TopCand.SU->isScheduled || in pickNodeBidirectional()
4045 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); in pickNodeBidirectional()
4054 "Last pick result should correspond to re-picking right now"); in pickNodeBidirectional()
4063 TopCand.Reason = NoCand; in pickNodeBidirectional()
4076 if (DAG->top() == DAG->bottom()) { in pickNode()
4090 // Set the bottom-up policy based on the state of the current bottom in pickNode()
4094 assert(BotCand.Reason != NoCand && "failed to find a candidate"); in pickNode()
4106 // Set the top-down policy based on the state of the current top zone in pickNode()
4110 assert(TopCand.Reason != NoCand && "failed to find a candidate"); in pickNode()
4118 } while (SU->isScheduled); in pickNode()
4120 if (SU->isTopReady()) in pickNode()
4122 if (SU->isBottomReady()) in pickNode()
4125 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " in pickNode()
4126 << *SU->getInstr()); in pickNode()
4134 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); in schedNode()
4137 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); in schedNode()
4146 const TargetSubtargetInfo &STI = C->MF->getSubtarget(); in createGenericSchedPostRA()
4150 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); in createGenericSchedPostRA()
4154 //===----------------------------------------------------------------------===//
4156 //===----------------------------------------------------------------------===//
4168 /// Apply a less-than relation on node priority.
4172 unsigned SchedTreeA = DFSResult->getSubtreeID(A); in operator ()()
4173 unsigned SchedTreeB = DFSResult->getSubtreeID(B); in operator ()()
4176 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) in operator ()()
4177 return ScheduledTrees->test(SchedTreeB); in operator ()()
4180 if (DFSResult->getSubtreeLevel(SchedTreeA) in operator ()()
4181 != DFSResult->getSubtreeLevel(SchedTreeB)) { in operator ()()
4182 return DFSResult->getSubtreeLevel(SchedTreeA) in operator ()()
4183 < DFSResult->getSubtreeLevel(SchedTreeB); in operator ()()
4187 return DFSResult->getILP(A) < DFSResult->getILP(B); in operator ()()
4189 return DFSResult->getILP(A) > DFSResult->getILP(B); in operator ()()
4204 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); in initialize()
4206 DAG->computeDFSResult(); in initialize()
4207 Cmp.DFSResult = DAG->getDFSResult(); in initialize()
4208 Cmp.ScheduledTrees = &DAG->getScheduledTrees(); in initialize()
4218 /// -----------------------------------------
4228 << "SU(" << SU->NodeNum << ") " in pickNode()
4229 << " ILP: " << DAG->getDFSResult()->getILP(SU) in pickNode()
4230 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) in pickNode()
4232 << DAG->getDFSResult()->getSubtreeLevel( in pickNode()
4233 DAG->getDFSResult()->getSubtreeID(SU)) in pickNode()
4235 << "Scheduling " << *SU->getInstr()); in pickNode()
4247 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); in schedNode()
4268 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4270 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4272 //===----------------------------------------------------------------------===//
4274 //===----------------------------------------------------------------------===//
4279 /// Apply a less-than relation on the node order, which corresponds to the
4280 /// instruction order prior to scheduling. IsReverse implements greater-than.
4285 return A->NodeNum > B->NodeNum; in operator ()()
4287 return A->NodeNum < B->NodeNum; in operator ()()
4296 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4302 // When scheduling bottom-up, use greater-than as the queue priority.
4316 /// -----------------------------------------
4325 } while (SU->isScheduled); in pickNode()
4332 } while (SU->isScheduled); in pickNode()
4356 "-misched-topdown incompatible with -misched-bottomup"); in createInstructionShuffler()
4366 //===----------------------------------------------------------------------===//
4368 //===----------------------------------------------------------------------===//
4381 return std::string(G->MF.getName()); in getGraphName()
4391 return (Node->Preds.size() > ViewMISchedCutoff in isNodeHidden()
4392 || Node->Succs.size() > ViewMISchedCutoff); in isNodeHidden()
4411 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? in getNodeLabel()
4412 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; in getNodeLabel()
4413 SS << "SU:" << SU->NodeNum; in getNodeLabel()
4415 SS << " I:" << DFS->getNumInstrs(SU); in getNodeLabel()
4420 return G->getGraphNodeLabel(SU); in getNodeDescription()
4426 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? in getNodeAttributes()
4427 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; in getNodeAttributes()
4430 Str += DOT::getColorString(DFS->getSubtreeID(N)); in getNodeAttributes()
4440 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4451 /// Out-of-line implementation with no arguments is handy for gdb.
4453 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); in viewGraph()
4471 "Cannot execute on an un-sorted set of intervals."); in getFirstAvailableAt()
4489 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first; in getFirstAvailableAt()
4498 assert(CutOff > 0 && "0-size interval history has no use."); in add()
4508 [&A](const ResourceSegments::IntervalTy &Interval) -> bool { in add()
4560 if (std::prev(next)->second >= next->first) { in sortAndMerge()
4561 next->first = std::prev(next)->first; in sortAndMerge()