Lines Matching +full:low +full:- +full:latency
1 //===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "machine-scheduler"
49 // . Hiding low latencies (SGPR constant loading, etc)
50 // . Keeping register usage low for better latency hiding and general
58 // simpler sub-problems. It divides the instructions into blocks, schedules
59 // locally inside the blocks where it takes care of low latencies, and then
62 // usage low.
79 // low latency instructions
82 // and keep register usage low.
84 // After the third step, a pass is done to improve the hiding of low
87 // Actually when talking about 'low latency' or 'high latency' it includes
88 // both the latency to get the cache (or global mem) data go to the register,
93 // The OpenCL doc says for example latency of 400 cycles for a global mem
97 // Both texture and constant L1 caches are 4-way associative with 64 bytes
101 // addresses per cycle, thus we could expect texture sampling latency to be
107 // Data exports should get similar latency.
116 // first loads get extra latency. The doc says global memory access can be
117 // 300-600 cycles. We do not specially take that into account when scheduling
128 case Latency: return "LATENCY"; in getReasonStr()
177 NodeNum2Index[SU->NodeNum] = SUnits.size(); in addUnit()
184 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); in traceCandidate()
202 // Schedule low latency instructions as top as possible. in tryCandidateTopDown()
204 // . Low latency instructions which do not depend on other low latency in tryCandidateTopDown()
206 // . Other instructions which do not depend on low latency instructions in tryCandidateTopDown()
208 // . Low latencies in tryCandidateTopDown()
210 // Goal is to get: low latency instructions - independent instructions in tryCandidateTopDown()
211 // - (eventually some more low latency instructions) in tryCandidateTopDown()
212 // - instructions that depend on the first low latency instructions. in tryCandidateTopDown()
236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { in tryCandidateTopDown()
250 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure); in pickNode()
253 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum]; in pickNode()
254 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum]; in pickNode()
256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]]; in pickNode()
273 if (!SU->NumPredsLeft) in fastSchedule()
292 UI = MRI->def_instr_begin(Reg), in isDefBetween()
293 UE = MRI->def_instr_end(); UI != UE; ++UI) { in isDefBetween()
295 if (MI->isDebugValue()) in isDefBetween()
297 SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot(); in isDefBetween()
308 LiveIntervals *LIS = DAG->getLIS(); in initRegPressure()
309 MachineRegisterInfo *MRI = DAG->getMRI(); in initRegPressure()
310 DAG->initRPTracker(TopRPTracker); in initRegPressure()
311 DAG->initRPTracker(BotRPTracker); in initRegPressure()
312 DAG->initRPTracker(RPTracker); in initRegPressure()
317 RPTracker.setPos(SU->getInstr()); in initRegPressure()
359 isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(), in initRegPressure()
360 LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI, in initRegPressure()
391 if (!SU->NumPredsLeft) in schedule()
398 TopRPTracker.setPos(SU->getInstr()); in schedule()
411 assert(SU->isScheduled && in schedule()
412 SU->NumPredsLeft == 0); in schedule()
421 SU->isScheduled = false; in undoSchedule()
422 for (SDep& Succ : SU->Succs) { in undoSchedule()
423 if (BC->isSUInBlock(Succ.getSUnit(), ID)) in undoSchedule()
433 SUnit *SuccSU = SuccEdge->getSUnit(); in undoReleaseSucc()
435 if (SuccEdge->isWeak()) { in undoReleaseSucc()
436 ++SuccSU->WeakPredsLeft; in undoReleaseSucc()
439 ++SuccSU->NumPredsLeft; in undoReleaseSucc()
443 SUnit *SuccSU = SuccEdge->getSUnit(); in releaseSucc()
445 if (SuccEdge->isWeak()) { in releaseSucc()
446 --SuccSU->WeakPredsLeft; in releaseSucc()
450 if (SuccSU->NumPredsLeft == 0) { in releaseSucc()
452 DAG->dumpNode(*SuccSU); in releaseSucc()
458 --SuccSU->NumPredsLeft; in releaseSucc()
463 for (SDep& Succ : SU->Succs) { in releaseSuccessors()
466 if (SuccSU->NodeNum >= DAG->SUnits.size()) in releaseSuccessors()
469 if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock) in releaseSuccessors()
473 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock) in releaseSuccessors()
480 assert (!SU->NumPredsLeft); in nodeScheduled()
491 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]]) in nodeScheduled()
494 if (DAG->IsLowLatencySU[SU->NodeNum]) { in nodeScheduled()
495 for (SDep& Succ : SU->Succs) { in nodeScheduled()
497 NodeNum2Index.find(Succ.getSUnit()->NodeNum); in nodeScheduled()
499 HasLowLatencyNonWaitedParent[I->second] = 1; in nodeScheduled()
502 SU->isScheduled = true; in nodeScheduled()
509 if (DAG->IsHighLatencySU[SU->NodeNum]) in finalizeUnits()
517 unsigned PredID = Pred->getID(); in addPred()
521 if (PredID == P->getID()) in addPred()
529 return PredID == S.first->getID(); in addPred()
536 unsigned SuccID = Succ->getID(); in addSucc()
540 if (SuccID == S.first->getID()) { in addSucc()
547 if (Succ->isHighLatencyBlock()) in addSucc()
552 [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && in addSucc()
562 dbgs() << "\nContains High Latency Instruction: " in printDebug()
566 P->printDebug(false); in printDebug()
573 S.first->printDebug(false); in printDebug()
585 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; in printDebug()
589 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; in printDebug()
594 DAG->dumpNode(*SU); in printDebug()
621 return B->second; in getBlocks()
625 if (SU->NodeNum >= DAG->SUnits.size()) in isSUInBlock()
627 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID; in isSUInBlock()
631 unsigned DAGSize = DAG->SUnits.size(); in colorHighLatenciesAlone()
634 SUnit *SU = &DAG->SUnits[i]; in colorHighLatenciesAlone()
635 if (DAG->IsHighLatencySU[SU->NodeNum]) { in colorHighLatenciesAlone()
636 CurrentColoring[SU->NodeNum] = NextReservedID++; in colorHighLatenciesAlone()
652 unsigned DAGSize = DAG->SUnits.size(); in colorHighLatenciesGroups()
660 SUnit *SU = &DAG->SUnits[i]; in colorHighLatenciesGroups()
661 if (DAG->IsHighLatencySU[SU->NodeNum]) in colorHighLatenciesGroups()
675 for (unsigned SUNum : DAG->TopDownIndex2SU) { in colorHighLatenciesGroups()
676 const SUnit &SU = DAG->SUnits[SUNum]; in colorHighLatenciesGroups()
677 if (DAG->IsHighLatencySU[SU.NodeNum]) { in colorHighLatenciesGroups()
683 // two high latency instructions that depend in colorHighLatenciesGroups()
687 // enough because there the high latency order is in colorHighLatenciesGroups()
690 // high latency instructions and deduce if it is in colorHighLatenciesGroups()
696 // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary in colorHighLatenciesGroups()
699 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j], in colorHighLatenciesGroups()
703 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU, in colorHighLatenciesGroups()
715 // there is another high latency instruction, in colorHighLatenciesGroups()
718 if (DAG->IsHighLatencySU[k] || (CurrentColoring[k] != ProposedColor && in colorHighLatenciesGroups()
725 if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) { in colorHighLatenciesGroups()
733 if (hasDataDependencyPred(SU, DAG->SUnits[j])) { in colorHighLatenciesGroups()
741 // high latency instruction we add. in colorHighLatenciesGroups()
753 // -> start a new one. in colorHighLatenciesGroups()
772 unsigned DAGSize = DAG->SUnits.size(); in colorComputeReservedDependencies()
784 for (unsigned SUNum : DAG->TopDownIndex2SU) { in colorComputeReservedDependencies()
785 SUnit *SU = &DAG->SUnits[SUNum]; in colorComputeReservedDependencies()
789 if (CurrentColoring[SU->NodeNum]) { in colorComputeReservedDependencies()
790 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = in colorComputeReservedDependencies()
791 CurrentColoring[SU->NodeNum]; in colorComputeReservedDependencies()
795 for (SDep& PredDep : SU->Preds) { in colorComputeReservedDependencies()
797 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize) in colorComputeReservedDependencies()
799 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0) in colorComputeReservedDependencies()
800 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]); in colorComputeReservedDependencies()
807 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = in colorComputeReservedDependencies()
813 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second; in colorComputeReservedDependencies()
815 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = in colorComputeReservedDependencies()
826 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in colorComputeReservedDependencies()
827 SUnit *SU = &DAG->SUnits[SUNum]; in colorComputeReservedDependencies()
831 if (CurrentColoring[SU->NodeNum]) { in colorComputeReservedDependencies()
832 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = in colorComputeReservedDependencies()
833 CurrentColoring[SU->NodeNum]; in colorComputeReservedDependencies()
837 for (SDep& SuccDep : SU->Succs) { in colorComputeReservedDependencies()
839 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in colorComputeReservedDependencies()
841 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0) in colorComputeReservedDependencies()
842 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]); in colorComputeReservedDependencies()
849 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = in colorComputeReservedDependencies()
855 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second; in colorComputeReservedDependencies()
857 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = in colorComputeReservedDependencies()
871 for (const SUnit &SU : DAG->SUnits) { in colorAccordingToReservedDependencies()
874 // High latency instructions: already given. in colorAccordingToReservedDependencies()
884 CurrentColoring[SU.NodeNum] = Pos->second; in colorAccordingToReservedDependencies()
893 unsigned DAGSize = DAG->SUnits.size(); in colorEndsAccordingToDependencies()
905 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in colorEndsAccordingToDependencies()
906 SUnit *SU = &DAG->SUnits[SUNum]; in colorEndsAccordingToDependencies()
910 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) in colorEndsAccordingToDependencies()
913 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 || in colorEndsAccordingToDependencies()
914 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0) in colorEndsAccordingToDependencies()
917 for (SDep& SuccDep : SU->Succs) { in colorEndsAccordingToDependencies()
919 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in colorEndsAccordingToDependencies()
921 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 || in colorEndsAccordingToDependencies()
922 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0) in colorEndsAccordingToDependencies()
923 SUColors.insert(CurrentColoring[Succ->NodeNum]); in colorEndsAccordingToDependencies()
924 SUColorsPending.insert(PendingColoring[Succ->NodeNum]); in colorEndsAccordingToDependencies()
930 PendingColoring[SU->NodeNum] = *SUColors.begin(); in colorEndsAccordingToDependencies()
933 PendingColoring[SU->NodeNum] = NextNonReservedID++; in colorEndsAccordingToDependencies()
940 unsigned DAGSize = DAG->SUnits.size(); in colorForceConsecutiveOrderInGroup()
950 SUnit *SU = &DAG->SUnits[i]; in colorForceConsecutiveOrderInGroup()
953 assert(i == SU->NodeNum); in colorForceConsecutiveOrderInGroup()
959 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) in colorForceConsecutiveOrderInGroup()
968 CurrentColoring[i] = CurrentColoring[i-1]; in colorForceConsecutiveOrderInGroup()
973 unsigned DAGSize = DAG->SUnits.size(); in colorMergeConstantLoadsNextGroup()
975 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in colorMergeConstantLoadsNextGroup()
976 SUnit *SU = &DAG->SUnits[SUNum]; in colorMergeConstantLoadsNextGroup()
979 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) in colorMergeConstantLoadsNextGroup()
983 // Low latency instructions usually have a predecessor (the address) in colorMergeConstantLoadsNextGroup()
984 if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum]) in colorMergeConstantLoadsNextGroup()
987 for (SDep& SuccDep : SU->Succs) { in colorMergeConstantLoadsNextGroup()
989 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in colorMergeConstantLoadsNextGroup()
991 SUColors.insert(CurrentColoring[Succ->NodeNum]); in colorMergeConstantLoadsNextGroup()
994 CurrentColoring[SU->NodeNum] = *SUColors.begin(); in colorMergeConstantLoadsNextGroup()
999 unsigned DAGSize = DAG->SUnits.size(); in colorMergeIfPossibleNextGroup()
1001 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in colorMergeIfPossibleNextGroup()
1002 SUnit *SU = &DAG->SUnits[SUNum]; in colorMergeIfPossibleNextGroup()
1005 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) in colorMergeIfPossibleNextGroup()
1008 for (SDep& SuccDep : SU->Succs) { in colorMergeIfPossibleNextGroup()
1010 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in colorMergeIfPossibleNextGroup()
1012 SUColors.insert(CurrentColoring[Succ->NodeNum]); in colorMergeIfPossibleNextGroup()
1015 CurrentColoring[SU->NodeNum] = *SUColors.begin(); in colorMergeIfPossibleNextGroup()
1020 unsigned DAGSize = DAG->SUnits.size(); in colorMergeIfPossibleNextGroupOnlyForReserved()
1022 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in colorMergeIfPossibleNextGroupOnlyForReserved()
1023 SUnit *SU = &DAG->SUnits[SUNum]; in colorMergeIfPossibleNextGroupOnlyForReserved()
1026 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) in colorMergeIfPossibleNextGroupOnlyForReserved()
1029 for (SDep& SuccDep : SU->Succs) { in colorMergeIfPossibleNextGroupOnlyForReserved()
1031 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in colorMergeIfPossibleNextGroupOnlyForReserved()
1033 SUColors.insert(CurrentColoring[Succ->NodeNum]); in colorMergeIfPossibleNextGroupOnlyForReserved()
1036 CurrentColoring[SU->NodeNum] = *SUColors.begin(); in colorMergeIfPossibleNextGroupOnlyForReserved()
1041 unsigned DAGSize = DAG->SUnits.size(); in colorMergeIfPossibleSmallGroupsToNextGroup()
1044 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in colorMergeIfPossibleSmallGroupsToNextGroup()
1045 SUnit *SU = &DAG->SUnits[SUNum]; in colorMergeIfPossibleSmallGroupsToNextGroup()
1046 unsigned color = CurrentColoring[SU->NodeNum]; in colorMergeIfPossibleSmallGroupsToNextGroup()
1050 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in colorMergeIfPossibleSmallGroupsToNextGroup()
1051 SUnit *SU = &DAG->SUnits[SUNum]; in colorMergeIfPossibleSmallGroupsToNextGroup()
1052 unsigned color = CurrentColoring[SU->NodeNum]; in colorMergeIfPossibleSmallGroupsToNextGroup()
1055 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) in colorMergeIfPossibleSmallGroupsToNextGroup()
1061 for (SDep& SuccDep : SU->Succs) { in colorMergeIfPossibleSmallGroupsToNextGroup()
1063 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in colorMergeIfPossibleSmallGroupsToNextGroup()
1065 SUColors.insert(CurrentColoring[Succ->NodeNum]); in colorMergeIfPossibleSmallGroupsToNextGroup()
1068 --ColorCount[color]; in colorMergeIfPossibleSmallGroupsToNextGroup()
1069 CurrentColoring[SU->NodeNum] = *SUColors.begin(); in colorMergeIfPossibleSmallGroupsToNextGroup()
1080 unsigned DAGSize = DAG->SUnits.size(); in regroupNoUserInstructions()
1083 for (unsigned SUNum : DAG->BottomUpIndex2SU) { in regroupNoUserInstructions()
1084 SUnit *SU = &DAG->SUnits[SUNum]; in regroupNoUserInstructions()
1087 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) in regroupNoUserInstructions()
1090 for (SDep& SuccDep : SU->Succs) { in regroupNoUserInstructions()
1092 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in regroupNoUserInstructions()
1097 CurrentColoring[SU->NodeNum] = GroupID; in regroupNoUserInstructions()
1115 for (unsigned SUNum : DAG->TopDownIndex2SU) { in colorExports()
1116 const SUnit &SU = DAG->SUnits[SUNum]; in colorExports()
1119 // dependencies is a non-export, in which case we skip export grouping. in colorExports()
1122 if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) { in colorExports()
1126 assert(SuccSU->isInstr() && in colorExports()
1129 if (!SIInstrInfo::isEXP(*SuccSU->getInstr())) { in colorExports()
1130 // A non-export depends on us. Skip export grouping. in colorExports()
1132 // exports that are not depended on by non-exports, directly or in colorExports()
1148 unsigned DAGSize = DAG->SUnits.size(); in createBlocksForVariant()
1157 DAG->restoreSULinksLeft(); in createBlocksForVariant()
1179 Node2CurrentBlock.resize(DAGSize, -1); in createBlocksForVariant()
1181 SUnit *SU = &DAG->SUnits[i]; in createBlocksForVariant()
1182 unsigned Color = CurrentColoring[SU->NodeNum]; in createBlocksForVariant()
1186 CurrentBlocks.push_back(BlockPtrs.rbegin()->get()); in createBlocksForVariant()
1189 CurrentBlocks[RealID[Color]]->addUnit(SU); in createBlocksForVariant()
1190 Node2CurrentBlock[SU->NodeNum] = RealID[Color]; in createBlocksForVariant()
1195 SUnit *SU = &DAG->SUnits[i]; in createBlocksForVariant()
1197 for (SDep& SuccDep : SU->Succs) { in createBlocksForVariant()
1199 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in createBlocksForVariant()
1201 if (Node2CurrentBlock[Succ->NodeNum] != SUID) in createBlocksForVariant()
1202 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]], in createBlocksForVariant()
1205 for (SDep& PredDep : SU->Preds) { in createBlocksForVariant()
1207 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize) in createBlocksForVariant()
1209 if (Node2CurrentBlock[Pred->NodeNum] != SUID) in createBlocksForVariant()
1210 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]); in createBlocksForVariant()
1216 Block->finalizeUnits(); in createBlocksForVariant()
1220 Block->printDebug(true); in createBlocksForVariant()
1226 /// Non-const version.
1231 if (!I->isDebugInstr()) in nextIfDebug()
1250 unsigned Degree = Block->getSuccs().size(); in topologicalSort()
1262 TopDownBlock2Index[i] = --Id; in topologicalSort()
1264 for (SIScheduleBlock* Pred : Block->getPreds()) { in topologicalSort()
1265 if (!--TopDownBlock2Index[Pred->getID()]) in topologicalSort()
1266 WorkList.push_back(Pred->getID()); in topologicalSort()
1274 for (SIScheduleBlock* Pred : Block->getPreds()) { in topologicalSort()
1275 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && in topologicalSort()
1295 Block->fastSchedule(); in scheduleInsideBlocks()
1302 MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop(); in scheduleInsideBlocks()
1305 PosOld.reserve(DAG->SUnits.size()); in scheduleInsideBlocks()
1306 PosNew.reserve(DAG->SUnits.size()); in scheduleInsideBlocks()
1311 std::vector<SUnit*> SUs = Block->getScheduledUnits(); in scheduleInsideBlocks()
1314 MachineInstr *MI = SU->getInstr(); in scheduleInsideBlocks()
1320 DAG->getCurrentBottom()); in scheduleInsideBlocks()
1323 DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI); in scheduleInsideBlocks()
1330 DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true); in scheduleInsideBlocks()
1342 std::vector<SUnit*> SUs = Block->getScheduledUnits(); in scheduleInsideBlocks()
1343 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr()); in scheduleInsideBlocks()
1347 // Restore old ordering (which prevents a LIS->handleMove bug). in scheduleInsideBlocks()
1348 for (unsigned i = PosOld.size(), e = 0; i != e; --i) { in scheduleInsideBlocks()
1349 MachineBasicBlock::iterator POld = PosOld[i-1]; in scheduleInsideBlocks()
1350 MachineBasicBlock::iterator PNew = PosNew[i-1]; in scheduleInsideBlocks()
1353 DAG->getBB()->splice(POld, DAG->getBB(), PNew); in scheduleInsideBlocks()
1356 DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true); in scheduleInsideBlocks()
1362 Block->printDebug(true); in scheduleInsideBlocks()
1372 if (Block->getPreds().empty()) in fillStats()
1373 Block->Depth = 0; in fillStats()
1376 for (SIScheduleBlock *Pred : Block->getPreds()) { in fillStats()
1377 if (Depth < Pred->Depth + Pred->getCost()) in fillStats()
1378 Depth = Pred->Depth + Pred->getCost(); in fillStats()
1380 Block->Depth = Depth; in fillStats()
1387 if (Block->getSuccs().empty()) in fillStats()
1388 Block->Height = 0; in fillStats()
1391 for (const auto &Succ : Block->getSuccs()) in fillStats()
1392 Height = std::max(Height, Succ.first->Height + Succ.first->getCost()); in fillStats()
1393 Block->Height = Height; in fillStats()
1419 for (unsigned Reg : Block->getInRegs()) { in SIScheduleBlockScheduler()
1421 int topoInd = -1; in SIScheduleBlockScheduler()
1422 for (SIScheduleBlock* Pred: Block->getPreds()) { in SIScheduleBlockScheduler()
1423 std::set<unsigned> PredOutRegs = Pred->getOutRegs(); in SIScheduleBlockScheduler()
1428 if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) { in SIScheduleBlockScheduler()
1429 topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()]; in SIScheduleBlockScheduler()
1448 BlockNumPredsLeft[i] = Block->getPreds().size(); in SIScheduleBlockScheduler()
1449 BlockNumSuccsLeft[i] = Block->getSuccs().size(); in SIScheduleBlockScheduler()
1455 assert(Block->getID() == i); in SIScheduleBlockScheduler()
1459 std::set<unsigned> InRegs = DAG->getInRegs(); in SIScheduleBlockScheduler()
1465 for (unsigned Reg : DAG->getOutRegs()) { in SIScheduleBlockScheduler()
1468 int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i]; in SIScheduleBlockScheduler()
1470 const std::set<unsigned> &OutRegs = Block->getOutRegs(); in SIScheduleBlockScheduler()
1483 for (unsigned Reg : Block->getInRegs()) { in SIScheduleBlockScheduler()
1485 for (SIScheduleBlock* Pred: Block->getPreds()) { in SIScheduleBlockScheduler()
1486 std::set<unsigned> PredOutRegs = Pred->getOutRegs(); in SIScheduleBlockScheduler()
1514 dbgs() << ' ' << Block->getID(); in SIScheduleBlockScheduler()
1527 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency)) in tryCandidateLatency()
1531 TryCand, Cand, Latency)) in tryCandidateLatency()
1572 DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(), in pickBlock()
1581 << Block->getID() << ' '; in pickBlock()
1585 << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; in pickBlock()
1595 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock(); in pickBlock()
1597 checkRegUsageImpact(TryCand.Block->getInRegs(), in pickBlock()
1598 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32]; in pickBlock()
1599 TryCand.NumSuccessors = TryCand.Block->getSuccs().size(); in pickBlock()
1601 TryCand.Block->getNumHighLatencySuccessors(); in pickBlock()
1604 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] - in pickBlock()
1606 TryCand.Height = TryCand.Block->Height; in pickBlock()
1620 LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' in pickBlock()
1625 LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n'; in pickBlock()
1626 dbgs() << "Is a block with high latency instruction: " in pickBlock()
1628 dbgs() << "Position of last high latency dependency: " in pickBlock()
1658 --LiveRegsConsumers[Reg]; in decreaseLiveRegs()
1665 for (const auto &Block : Parent->getSuccs()) { in releaseBlockSuccs()
1666 if (--BlockNumPredsLeft[Block.first->getID()] == 0) in releaseBlockSuccs()
1669 if (Parent->isHighLatencyBlock() && in releaseBlockSuccs()
1671 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled; in releaseBlockSuccs()
1676 decreaseLiveRegs(Block, Block->getInRegs()); in blockScheduled()
1677 addLiveRegs(Block->getOutRegs()); in blockScheduled()
1679 for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) { in blockScheduled()
1685 if (LastPosHighLatencyParentScheduled[Block->getID()] > in blockScheduled()
1688 LastPosHighLatencyParentScheduled[Block->getID()]; in blockScheduled()
1696 DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0); in checkRegUsageImpact()
1704 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); in checkRegUsageImpact()
1706 DiffSetPressure[*PSetI] -= PSetI.getWeight(); in checkRegUsageImpact()
1714 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); in checkRegUsageImpact()
1736 std::vector<SUnit*> SUs = Block->getScheduledUnits(); in scheduleVariant()
1739 Res.SUs.push_back(SU->NodeNum); in scheduleVariant()
1767 // Move low latencies further from their user without
1775 int LastLowLatencyUser = -1; in moveLowLatencies()
1776 int LastLowLatencyPos = -1; in moveLowLatencies()
1783 for (SDep& PredDep : SU->Preds) { in moveLowLatencies()
1785 if (SITII->isLowLatencyInstruction(*Pred->getInstr())) { in moveLowLatencies()
1788 if (Pred->NodeNum >= DAGSize) in moveLowLatencies()
1790 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum]; in moveLowLatencies()
1795 if (SITII->isLowLatencyInstruction(*SU->getInstr())) { in moveLowLatencies()
1802 for (unsigned u = i; u > BestPos; --u) { in moveLowLatencies()
1803 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]]; in moveLowLatencies()
1804 ScheduledSUnits[u] = ScheduledSUnits[u-1]; in moveLowLatencies()
1806 ScheduledSUnits[BestPos] = SU->NodeNum; in moveLowLatencies()
1807 ScheduledSUnitsInv[SU->NodeNum] = BestPos; in moveLowLatencies()
1815 // the low latency instructions too. in moveLowLatencies()
1816 } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) { in moveLowLatencies()
1818 for (SDep& SuccDep : SU->Succs) { in moveLowLatencies()
1820 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) in moveLowLatencies()
1822 if (SITII->isLowLatencyInstruction(*Succ->getInstr())) { in moveLowLatencies()
1829 for (unsigned u = i; u > MinPos; --u) { in moveLowLatencies()
1830 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]]; in moveLowLatencies()
1831 ScheduledSUnits[u] = ScheduledSUnits[u-1]; in moveLowLatencies()
1833 ScheduledSUnits[MinPos] = SU->NodeNum; in moveLowLatencies()
1834 ScheduledSUnitsInv[SU->NodeNum] = MinPos; in moveLowLatencies()
1892 SchedImpl->initialize(this); in schedule()
1910 if (SITII->isLowLatencyInstruction(*SU->getInstr())) { in schedule()
1913 if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg, in schedule()
1916 } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode())) in schedule()
1986 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " in schedule()
1987 << *SU->getInstr()); in schedule()
1996 << printMBBReference(*begin()->getParent()) << " ***\n"; in schedule()