10b57cec5SDimitry Andric //===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// SI Machine Scheduler interface
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "SIMachineScheduler.h"
150b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16*0fca6ea1SDimitry Andric #include "SIInstrInfo.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
190b57cec5SDimitry Andric
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric
220b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric // This scheduler implements a different scheduling algorithm than
250b57cec5SDimitry Andric // GenericScheduler.
260b57cec5SDimitry Andric //
270b57cec5SDimitry Andric // There are several specific architecture behaviours that can't be modelled
280b57cec5SDimitry Andric // for GenericScheduler:
290b57cec5SDimitry Andric // . When accessing the result of an SGPR load instruction, you have to wait
300b57cec5SDimitry Andric // for all the SGPR load instructions before your current instruction to
310b57cec5SDimitry Andric // have finished.
320b57cec5SDimitry Andric // . When accessing the result of an VGPR load instruction, you have to wait
330b57cec5SDimitry Andric // for all the VGPR load instructions previous to the VGPR load instruction
340b57cec5SDimitry Andric // you are interested in to finish.
350b57cec5SDimitry Andric // . The less the register pressure, the best load latencies are hidden
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric // Moreover some specifities (like the fact a lot of instructions in the shader
380b57cec5SDimitry Andric // have few dependencies) makes the generic scheduler have some unpredictable
390b57cec5SDimitry Andric // behaviours. For example when register pressure becomes high, it can either
400b57cec5SDimitry Andric // manage to prevent register pressure from going too high, or it can
410b57cec5SDimitry Andric // increase register pressure even more than if it hadn't taken register
420b57cec5SDimitry Andric // pressure into account.
430b57cec5SDimitry Andric //
440b57cec5SDimitry Andric // Also some other bad behaviours are generated, like loading at the beginning
450b57cec5SDimitry Andric // of the shader a constant in VGPR you won't need until the end of the shader.
460b57cec5SDimitry Andric //
470b57cec5SDimitry Andric // The scheduling problem for SI can distinguish three main parts:
480b57cec5SDimitry Andric // . Hiding high latencies (texture sampling, etc)
490b57cec5SDimitry Andric // . Hiding low latencies (SGPR constant loading, etc)
500b57cec5SDimitry Andric // . Keeping register usage low for better latency hiding and general
510b57cec5SDimitry Andric // performance
520b57cec5SDimitry Andric //
530b57cec5SDimitry Andric // Some other things can also affect performance, but are hard to predict
540b57cec5SDimitry Andric // (cache usage, the fact the HW can issue several instructions from different
550b57cec5SDimitry Andric // wavefronts if different types, etc)
560b57cec5SDimitry Andric //
570b57cec5SDimitry Andric // This scheduler tries to solve the scheduling problem by dividing it into
580b57cec5SDimitry Andric // simpler sub-problems. It divides the instructions into blocks, schedules
590b57cec5SDimitry Andric // locally inside the blocks where it takes care of low latencies, and then
600b57cec5SDimitry Andric // chooses the order of the blocks by taking care of high latencies.
610b57cec5SDimitry Andric // Dividing the instructions into blocks helps control keeping register
620b57cec5SDimitry Andric // usage low.
630b57cec5SDimitry Andric //
640b57cec5SDimitry Andric // First the instructions are put into blocks.
650b57cec5SDimitry Andric // We want the blocks help control register usage and hide high latencies
660b57cec5SDimitry Andric // later. To help control register usage, we typically want all local
6781ad6265SDimitry Andric // computations, when for example you create a result that can be consumed
680b57cec5SDimitry Andric // right away, to be contained in a block. Block inputs and outputs would
690b57cec5SDimitry Andric // typically be important results that are needed in several locations of
700b57cec5SDimitry Andric // the shader. Since we do want blocks to help hide high latencies, we want
710b57cec5SDimitry Andric // the instructions inside the block to have a minimal set of dependencies
720b57cec5SDimitry Andric // on high latencies. It will make it easy to pick blocks to hide specific
730b57cec5SDimitry Andric // high latencies.
740b57cec5SDimitry Andric // The block creation algorithm is divided into several steps, and several
750b57cec5SDimitry Andric // variants can be tried during the scheduling process.
760b57cec5SDimitry Andric //
770b57cec5SDimitry Andric // Second the order of the instructions inside the blocks is chosen.
780b57cec5SDimitry Andric // At that step we do take into account only register usage and hiding
790b57cec5SDimitry Andric // low latency instructions
800b57cec5SDimitry Andric //
810b57cec5SDimitry Andric // Third the block order is chosen, there we try to hide high latencies
820b57cec5SDimitry Andric // and keep register usage low.
830b57cec5SDimitry Andric //
840b57cec5SDimitry Andric // After the third step, a pass is done to improve the hiding of low
850b57cec5SDimitry Andric // latencies.
860b57cec5SDimitry Andric //
870b57cec5SDimitry Andric // Actually when talking about 'low latency' or 'high latency' it includes
880b57cec5SDimitry Andric // both the latency to get the cache (or global mem) data go to the register,
890b57cec5SDimitry Andric // and the bandwidth limitations.
900b57cec5SDimitry Andric // Increasing the number of active wavefronts helps hide the former, but it
910b57cec5SDimitry Andric // doesn't solve the latter, thus why even if wavefront count is high, we have
920b57cec5SDimitry Andric // to try have as many instructions hiding high latencies as possible.
9381ad6265SDimitry Andric // The OpenCL doc says for example latency of 400 cycles for a global mem
9481ad6265SDimitry Andric // access, which is hidden by 10 instructions if the wavefront count is 10.
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric // Some figures taken from AMD docs:
970b57cec5SDimitry Andric // Both texture and constant L1 caches are 4-way associative with 64 bytes
980b57cec5SDimitry Andric // lines.
990b57cec5SDimitry Andric // Constant cache is shared with 4 CUs.
1000b57cec5SDimitry Andric // For texture sampling, the address generation unit receives 4 texture
1010b57cec5SDimitry Andric // addresses per cycle, thus we could expect texture sampling latency to be
1020b57cec5SDimitry Andric // equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
1030b57cec5SDimitry Andric // instructions in a wavefront group are executed every 4 cycles),
1040b57cec5SDimitry Andric // or 16 instructions if the other wavefronts associated to the 3 other VALUs
1050b57cec5SDimitry Andric // of the CU do texture sampling too. (Don't take these figures too seriously,
1060b57cec5SDimitry Andric // as I'm not 100% sure of the computation)
1070b57cec5SDimitry Andric // Data exports should get similar latency.
1080b57cec5SDimitry Andric // For constant loading, the cache is shader with 4 CUs.
1090b57cec5SDimitry Andric // The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
1100b57cec5SDimitry Andric // I guess if the other CU don't read the cache, it can go up to 64B/cycle.
1110b57cec5SDimitry Andric // It means a simple s_buffer_load should take one instruction to hide, as
1120b57cec5SDimitry Andric // well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
1130b57cec5SDimitry Andric // cache line.
1140b57cec5SDimitry Andric //
1150b57cec5SDimitry Andric // As of today the driver doesn't preload the constants in cache, thus the
1160b57cec5SDimitry Andric // first loads get extra latency. The doc says global memory access can be
1170b57cec5SDimitry Andric // 300-600 cycles. We do not specially take that into account when scheduling
1180b57cec5SDimitry Andric // As we expect the driver to be able to preload the constants soon.
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric // common code //
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric #ifndef NDEBUG
1230b57cec5SDimitry Andric
getReasonStr(SIScheduleCandReason Reason)1240b57cec5SDimitry Andric static const char *getReasonStr(SIScheduleCandReason Reason) {
1250b57cec5SDimitry Andric switch (Reason) {
1260b57cec5SDimitry Andric case NoCand: return "NOCAND";
1270b57cec5SDimitry Andric case RegUsage: return "REGUSAGE";
1280b57cec5SDimitry Andric case Latency: return "LATENCY";
1290b57cec5SDimitry Andric case Successor: return "SUCCESSOR";
1300b57cec5SDimitry Andric case Depth: return "DEPTH";
1310b57cec5SDimitry Andric case NodeOrder: return "ORDER";
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric llvm_unreachable("Unknown reason!");
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric
1360b57cec5SDimitry Andric #endif
1370b57cec5SDimitry Andric
138*0fca6ea1SDimitry Andric namespace llvm::SISched {
tryLess(int TryVal,int CandVal,SISchedulerCandidate & TryCand,SISchedulerCandidate & Cand,SIScheduleCandReason Reason)1390b57cec5SDimitry Andric static bool tryLess(int TryVal, int CandVal,
1400b57cec5SDimitry Andric SISchedulerCandidate &TryCand,
1410b57cec5SDimitry Andric SISchedulerCandidate &Cand,
1420b57cec5SDimitry Andric SIScheduleCandReason Reason) {
1430b57cec5SDimitry Andric if (TryVal < CandVal) {
1440b57cec5SDimitry Andric TryCand.Reason = Reason;
1450b57cec5SDimitry Andric return true;
1460b57cec5SDimitry Andric }
1470b57cec5SDimitry Andric if (TryVal > CandVal) {
1480b57cec5SDimitry Andric if (Cand.Reason > Reason)
1490b57cec5SDimitry Andric Cand.Reason = Reason;
1500b57cec5SDimitry Andric return true;
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric Cand.setRepeat(Reason);
1530b57cec5SDimitry Andric return false;
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric
tryGreater(int TryVal,int CandVal,SISchedulerCandidate & TryCand,SISchedulerCandidate & Cand,SIScheduleCandReason Reason)1560b57cec5SDimitry Andric static bool tryGreater(int TryVal, int CandVal,
1570b57cec5SDimitry Andric SISchedulerCandidate &TryCand,
1580b57cec5SDimitry Andric SISchedulerCandidate &Cand,
1590b57cec5SDimitry Andric SIScheduleCandReason Reason) {
1600b57cec5SDimitry Andric if (TryVal > CandVal) {
1610b57cec5SDimitry Andric TryCand.Reason = Reason;
1620b57cec5SDimitry Andric return true;
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric if (TryVal < CandVal) {
1650b57cec5SDimitry Andric if (Cand.Reason > Reason)
1660b57cec5SDimitry Andric Cand.Reason = Reason;
1670b57cec5SDimitry Andric return true;
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric Cand.setRepeat(Reason);
1700b57cec5SDimitry Andric return false;
1710b57cec5SDimitry Andric }
172*0fca6ea1SDimitry Andric } // end namespace llvm::SISched
1730b57cec5SDimitry Andric
1740b57cec5SDimitry Andric // SIScheduleBlock //
1750b57cec5SDimitry Andric
addUnit(SUnit * SU)1760b57cec5SDimitry Andric void SIScheduleBlock::addUnit(SUnit *SU) {
1770b57cec5SDimitry Andric NodeNum2Index[SU->NodeNum] = SUnits.size();
1780b57cec5SDimitry Andric SUnits.push_back(SU);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric
1810b57cec5SDimitry Andric #ifndef NDEBUG
traceCandidate(const SISchedCandidate & Cand)1820b57cec5SDimitry Andric void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
1850b57cec5SDimitry Andric dbgs() << '\n';
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric #endif
1880b57cec5SDimitry Andric
tryCandidateTopDown(SISchedCandidate & Cand,SISchedCandidate & TryCand)1890b57cec5SDimitry Andric void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
1900b57cec5SDimitry Andric SISchedCandidate &TryCand) {
1910b57cec5SDimitry Andric // Initialize the candidate if needed.
1920b57cec5SDimitry Andric if (!Cand.isValid()) {
1930b57cec5SDimitry Andric TryCand.Reason = NodeOrder;
1940b57cec5SDimitry Andric return;
1950b57cec5SDimitry Andric }
1960b57cec5SDimitry Andric
1970b57cec5SDimitry Andric if (Cand.SGPRUsage > 60 &&
1980b57cec5SDimitry Andric SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
1990b57cec5SDimitry Andric TryCand, Cand, RegUsage))
2000b57cec5SDimitry Andric return;
2010b57cec5SDimitry Andric
2020b57cec5SDimitry Andric // Schedule low latency instructions as top as possible.
2030b57cec5SDimitry Andric // Order of priority is:
2040b57cec5SDimitry Andric // . Low latency instructions which do not depend on other low latency
2050b57cec5SDimitry Andric // instructions we haven't waited for
2060b57cec5SDimitry Andric // . Other instructions which do not depend on low latency instructions
2070b57cec5SDimitry Andric // we haven't waited for
2080b57cec5SDimitry Andric // . Low latencies
2090b57cec5SDimitry Andric // . All other instructions
2100b57cec5SDimitry Andric // Goal is to get: low latency instructions - independent instructions
2110b57cec5SDimitry Andric // - (eventually some more low latency instructions)
2120b57cec5SDimitry Andric // - instructions that depend on the first low latency instructions.
2130b57cec5SDimitry Andric // If in the block there is a lot of constant loads, the SGPR usage
2140b57cec5SDimitry Andric // could go quite high, thus above the arbitrary limit of 60 will encourage
2150b57cec5SDimitry Andric // use the already loaded constants (in order to release some SGPRs) before
2160b57cec5SDimitry Andric // loading more.
2170b57cec5SDimitry Andric if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
2180b57cec5SDimitry Andric Cand.HasLowLatencyNonWaitedParent,
2190b57cec5SDimitry Andric TryCand, Cand, SIScheduleCandReason::Depth))
2200b57cec5SDimitry Andric return;
2210b57cec5SDimitry Andric
2220b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
2230b57cec5SDimitry Andric TryCand, Cand, SIScheduleCandReason::Depth))
2240b57cec5SDimitry Andric return;
2250b57cec5SDimitry Andric
2260b57cec5SDimitry Andric if (TryCand.IsLowLatency &&
2270b57cec5SDimitry Andric SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
2280b57cec5SDimitry Andric TryCand, Cand, SIScheduleCandReason::Depth))
2290b57cec5SDimitry Andric return;
2300b57cec5SDimitry Andric
2310b57cec5SDimitry Andric if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
2320b57cec5SDimitry Andric TryCand, Cand, RegUsage))
2330b57cec5SDimitry Andric return;
2340b57cec5SDimitry Andric
2350b57cec5SDimitry Andric // Fall through to original instruction order.
2360b57cec5SDimitry Andric if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
2370b57cec5SDimitry Andric TryCand.Reason = NodeOrder;
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric }
2400b57cec5SDimitry Andric
pickNode()2410b57cec5SDimitry Andric SUnit* SIScheduleBlock::pickNode() {
2420b57cec5SDimitry Andric SISchedCandidate TopCand;
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric for (SUnit* SU : TopReadySUs) {
2450b57cec5SDimitry Andric SISchedCandidate TryCand;
2460b57cec5SDimitry Andric std::vector<unsigned> pressure;
2470b57cec5SDimitry Andric std::vector<unsigned> MaxPressure;
2480b57cec5SDimitry Andric // Predict register usage after this instruction.
2490b57cec5SDimitry Andric TryCand.SU = SU;
2500b57cec5SDimitry Andric TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
2515ffd83dbSDimitry Andric TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
2525ffd83dbSDimitry Andric TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
2530b57cec5SDimitry Andric TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
2540b57cec5SDimitry Andric TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
2550b57cec5SDimitry Andric TryCand.HasLowLatencyNonWaitedParent =
2560b57cec5SDimitry Andric HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
2570b57cec5SDimitry Andric tryCandidateTopDown(TopCand, TryCand);
2580b57cec5SDimitry Andric if (TryCand.Reason != NoCand)
2590b57cec5SDimitry Andric TopCand.setBest(TryCand);
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric
2620b57cec5SDimitry Andric return TopCand.SU;
2630b57cec5SDimitry Andric }
2640b57cec5SDimitry Andric
2650b57cec5SDimitry Andric
2660b57cec5SDimitry Andric // Schedule something valid.
fastSchedule()2670b57cec5SDimitry Andric void SIScheduleBlock::fastSchedule() {
2680b57cec5SDimitry Andric TopReadySUs.clear();
2690b57cec5SDimitry Andric if (Scheduled)
2700b57cec5SDimitry Andric undoSchedule();
2710b57cec5SDimitry Andric
2720b57cec5SDimitry Andric for (SUnit* SU : SUnits) {
2730b57cec5SDimitry Andric if (!SU->NumPredsLeft)
2740b57cec5SDimitry Andric TopReadySUs.push_back(SU);
2750b57cec5SDimitry Andric }
2760b57cec5SDimitry Andric
2770b57cec5SDimitry Andric while (!TopReadySUs.empty()) {
2780b57cec5SDimitry Andric SUnit *SU = TopReadySUs[0];
2790b57cec5SDimitry Andric ScheduledSUnits.push_back(SU);
2800b57cec5SDimitry Andric nodeScheduled(SU);
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric
2830b57cec5SDimitry Andric Scheduled = true;
2840b57cec5SDimitry Andric }
2850b57cec5SDimitry Andric
2860b57cec5SDimitry Andric // Returns if the register was set between first and last.
isDefBetween(unsigned Reg,SlotIndex First,SlotIndex Last,const MachineRegisterInfo * MRI,const LiveIntervals * LIS)2870b57cec5SDimitry Andric static bool isDefBetween(unsigned Reg,
2880b57cec5SDimitry Andric SlotIndex First, SlotIndex Last,
2890b57cec5SDimitry Andric const MachineRegisterInfo *MRI,
2900b57cec5SDimitry Andric const LiveIntervals *LIS) {
2910b57cec5SDimitry Andric for (MachineRegisterInfo::def_instr_iterator
2920b57cec5SDimitry Andric UI = MRI->def_instr_begin(Reg),
2930b57cec5SDimitry Andric UE = MRI->def_instr_end(); UI != UE; ++UI) {
2940b57cec5SDimitry Andric const MachineInstr* MI = &*UI;
2950b57cec5SDimitry Andric if (MI->isDebugValue())
2960b57cec5SDimitry Andric continue;
2970b57cec5SDimitry Andric SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
2980b57cec5SDimitry Andric if (InstSlot >= First && InstSlot <= Last)
2990b57cec5SDimitry Andric return true;
3000b57cec5SDimitry Andric }
3010b57cec5SDimitry Andric return false;
3020b57cec5SDimitry Andric }
3030b57cec5SDimitry Andric
initRegPressure(MachineBasicBlock::iterator BeginBlock,MachineBasicBlock::iterator EndBlock)3040b57cec5SDimitry Andric void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
3050b57cec5SDimitry Andric MachineBasicBlock::iterator EndBlock) {
3060b57cec5SDimitry Andric IntervalPressure Pressure, BotPressure;
3070b57cec5SDimitry Andric RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
3080b57cec5SDimitry Andric LiveIntervals *LIS = DAG->getLIS();
3090b57cec5SDimitry Andric MachineRegisterInfo *MRI = DAG->getMRI();
3100b57cec5SDimitry Andric DAG->initRPTracker(TopRPTracker);
3110b57cec5SDimitry Andric DAG->initRPTracker(BotRPTracker);
3120b57cec5SDimitry Andric DAG->initRPTracker(RPTracker);
3130b57cec5SDimitry Andric
3140b57cec5SDimitry Andric // Goes though all SU. RPTracker captures what had to be alive for the SUs
3150b57cec5SDimitry Andric // to execute, and what is still alive at the end.
3160b57cec5SDimitry Andric for (SUnit* SU : ScheduledSUnits) {
3170b57cec5SDimitry Andric RPTracker.setPos(SU->getInstr());
3180b57cec5SDimitry Andric RPTracker.advance();
3190b57cec5SDimitry Andric }
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric // Close the RPTracker to finalize live ins/outs.
3220b57cec5SDimitry Andric RPTracker.closeRegion();
3230b57cec5SDimitry Andric
3240b57cec5SDimitry Andric // Initialize the live ins and live outs.
3250b57cec5SDimitry Andric TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
3260b57cec5SDimitry Andric BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
3270b57cec5SDimitry Andric
3280b57cec5SDimitry Andric // Do not Track Physical Registers, because it messes up.
3290b57cec5SDimitry Andric for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
330bdd1243dSDimitry Andric if (RegMaskPair.RegUnit.isVirtual())
3310b57cec5SDimitry Andric LiveInRegs.insert(RegMaskPair.RegUnit);
3320b57cec5SDimitry Andric }
3330b57cec5SDimitry Andric LiveOutRegs.clear();
3340b57cec5SDimitry Andric // There is several possibilities to distinguish:
3350b57cec5SDimitry Andric // 1) Reg is not input to any instruction in the block, but is output of one
3360b57cec5SDimitry Andric // 2) 1) + read in the block and not needed after it
3370b57cec5SDimitry Andric // 3) 1) + read in the block but needed in another block
3380b57cec5SDimitry Andric // 4) Reg is input of an instruction but another block will read it too
3390b57cec5SDimitry Andric // 5) Reg is input of an instruction and then rewritten in the block.
3400b57cec5SDimitry Andric // result is not read in the block (implies used in another block)
3410b57cec5SDimitry Andric // 6) Reg is input of an instruction and then rewritten in the block.
3420b57cec5SDimitry Andric // result is read in the block and not needed in another block
3430b57cec5SDimitry Andric // 7) Reg is input of an instruction and then rewritten in the block.
3440b57cec5SDimitry Andric // result is read in the block but also needed in another block
3450b57cec5SDimitry Andric // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
3460b57cec5SDimitry Andric // We want LiveOutRegs to contain only Regs whose content will be read after
3470b57cec5SDimitry Andric // in another block, and whose content was written in the current block,
3480b57cec5SDimitry Andric // that is we want it to get 1, 3, 5, 7
3490b57cec5SDimitry Andric // Since we made the MIs of a block to be packed all together before
3500b57cec5SDimitry Andric // scheduling, then the LiveIntervals were correct, and the RPTracker was
3510b57cec5SDimitry Andric // able to correctly handle 5 vs 6, 2 vs 3.
3520b57cec5SDimitry Andric // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
3530b57cec5SDimitry Andric // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
35481ad6265SDimitry Andric // Comparing to LiveInRegs is not sufficient to differentiate 4 vs 5, 7
3550b57cec5SDimitry Andric // The use of findDefBetween removes the case 4.
3560b57cec5SDimitry Andric for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
357e8d8bef9SDimitry Andric Register Reg = RegMaskPair.RegUnit;
358e8d8bef9SDimitry Andric if (Reg.isVirtual() &&
3590b57cec5SDimitry Andric isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
3600b57cec5SDimitry Andric LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
3610b57cec5SDimitry Andric LIS)) {
3620b57cec5SDimitry Andric LiveOutRegs.insert(Reg);
3630b57cec5SDimitry Andric }
3640b57cec5SDimitry Andric }
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andric // Pressure = sum_alive_registers register size
3670b57cec5SDimitry Andric // Internally llvm will represent some registers as big 128 bits registers
3680b57cec5SDimitry Andric // for example, but they actually correspond to 4 actual 32 bits registers.
3690b57cec5SDimitry Andric // Thus Pressure is not equal to num_alive_registers * constant.
3700b57cec5SDimitry Andric LiveInPressure = TopPressure.MaxSetPressure;
3710b57cec5SDimitry Andric LiveOutPressure = BotPressure.MaxSetPressure;
3720b57cec5SDimitry Andric
3730b57cec5SDimitry Andric // Prepares TopRPTracker for top down scheduling.
3740b57cec5SDimitry Andric TopRPTracker.closeTop();
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric
schedule(MachineBasicBlock::iterator BeginBlock,MachineBasicBlock::iterator EndBlock)3770b57cec5SDimitry Andric void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
3780b57cec5SDimitry Andric MachineBasicBlock::iterator EndBlock) {
3790b57cec5SDimitry Andric if (!Scheduled)
3800b57cec5SDimitry Andric fastSchedule();
3810b57cec5SDimitry Andric
3820b57cec5SDimitry Andric // PreScheduling phase to set LiveIn and LiveOut.
3830b57cec5SDimitry Andric initRegPressure(BeginBlock, EndBlock);
3840b57cec5SDimitry Andric undoSchedule();
3850b57cec5SDimitry Andric
3860b57cec5SDimitry Andric // Schedule for real now.
3870b57cec5SDimitry Andric
3880b57cec5SDimitry Andric TopReadySUs.clear();
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric for (SUnit* SU : SUnits) {
3910b57cec5SDimitry Andric if (!SU->NumPredsLeft)
3920b57cec5SDimitry Andric TopReadySUs.push_back(SU);
3930b57cec5SDimitry Andric }
3940b57cec5SDimitry Andric
3950b57cec5SDimitry Andric while (!TopReadySUs.empty()) {
3960b57cec5SDimitry Andric SUnit *SU = pickNode();
3970b57cec5SDimitry Andric ScheduledSUnits.push_back(SU);
3980b57cec5SDimitry Andric TopRPTracker.setPos(SU->getInstr());
3990b57cec5SDimitry Andric TopRPTracker.advance();
4000b57cec5SDimitry Andric nodeScheduled(SU);
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric
40381ad6265SDimitry Andric // TODO: compute InternalAdditionalPressure.
404349cc55cSDimitry Andric InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
4050b57cec5SDimitry Andric
4060b57cec5SDimitry Andric // Check everything is right.
4070b57cec5SDimitry Andric #ifndef NDEBUG
4080b57cec5SDimitry Andric assert(SUnits.size() == ScheduledSUnits.size() &&
4090b57cec5SDimitry Andric TopReadySUs.empty());
4100b57cec5SDimitry Andric for (SUnit* SU : SUnits) {
4110b57cec5SDimitry Andric assert(SU->isScheduled &&
4120b57cec5SDimitry Andric SU->NumPredsLeft == 0);
4130b57cec5SDimitry Andric }
4140b57cec5SDimitry Andric #endif
4150b57cec5SDimitry Andric
4160b57cec5SDimitry Andric Scheduled = true;
4170b57cec5SDimitry Andric }
4180b57cec5SDimitry Andric
undoSchedule()4190b57cec5SDimitry Andric void SIScheduleBlock::undoSchedule() {
4200b57cec5SDimitry Andric for (SUnit* SU : SUnits) {
4210b57cec5SDimitry Andric SU->isScheduled = false;
4220b57cec5SDimitry Andric for (SDep& Succ : SU->Succs) {
4230b57cec5SDimitry Andric if (BC->isSUInBlock(Succ.getSUnit(), ID))
4240b57cec5SDimitry Andric undoReleaseSucc(SU, &Succ);
4250b57cec5SDimitry Andric }
4260b57cec5SDimitry Andric }
4270b57cec5SDimitry Andric HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
4280b57cec5SDimitry Andric ScheduledSUnits.clear();
4290b57cec5SDimitry Andric Scheduled = false;
4300b57cec5SDimitry Andric }
4310b57cec5SDimitry Andric
undoReleaseSucc(SUnit * SU,SDep * SuccEdge)4320b57cec5SDimitry Andric void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
4330b57cec5SDimitry Andric SUnit *SuccSU = SuccEdge->getSUnit();
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric if (SuccEdge->isWeak()) {
4360b57cec5SDimitry Andric ++SuccSU->WeakPredsLeft;
4370b57cec5SDimitry Andric return;
4380b57cec5SDimitry Andric }
4390b57cec5SDimitry Andric ++SuccSU->NumPredsLeft;
4400b57cec5SDimitry Andric }
4410b57cec5SDimitry Andric
releaseSucc(SUnit * SU,SDep * SuccEdge)4420b57cec5SDimitry Andric void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
4430b57cec5SDimitry Andric SUnit *SuccSU = SuccEdge->getSUnit();
4440b57cec5SDimitry Andric
4450b57cec5SDimitry Andric if (SuccEdge->isWeak()) {
4460b57cec5SDimitry Andric --SuccSU->WeakPredsLeft;
4470b57cec5SDimitry Andric return;
4480b57cec5SDimitry Andric }
4490b57cec5SDimitry Andric #ifndef NDEBUG
4500b57cec5SDimitry Andric if (SuccSU->NumPredsLeft == 0) {
4510b57cec5SDimitry Andric dbgs() << "*** Scheduling failed! ***\n";
4520b57cec5SDimitry Andric DAG->dumpNode(*SuccSU);
4530b57cec5SDimitry Andric dbgs() << " has been released too many times!\n";
4540b57cec5SDimitry Andric llvm_unreachable(nullptr);
4550b57cec5SDimitry Andric }
4560b57cec5SDimitry Andric #endif
4570b57cec5SDimitry Andric
4580b57cec5SDimitry Andric --SuccSU->NumPredsLeft;
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric
4610b57cec5SDimitry Andric /// Release Successors of the SU that are in the block or not.
releaseSuccessors(SUnit * SU,bool InOrOutBlock)4620b57cec5SDimitry Andric void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
4630b57cec5SDimitry Andric for (SDep& Succ : SU->Succs) {
4640b57cec5SDimitry Andric SUnit *SuccSU = Succ.getSUnit();
4650b57cec5SDimitry Andric
4660b57cec5SDimitry Andric if (SuccSU->NodeNum >= DAG->SUnits.size())
4670b57cec5SDimitry Andric continue;
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
4700b57cec5SDimitry Andric continue;
4710b57cec5SDimitry Andric
4720b57cec5SDimitry Andric releaseSucc(SU, &Succ);
4730b57cec5SDimitry Andric if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
4740b57cec5SDimitry Andric TopReadySUs.push_back(SuccSU);
4750b57cec5SDimitry Andric }
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric
nodeScheduled(SUnit * SU)4780b57cec5SDimitry Andric void SIScheduleBlock::nodeScheduled(SUnit *SU) {
4790b57cec5SDimitry Andric // Is in TopReadySUs
4800b57cec5SDimitry Andric assert (!SU->NumPredsLeft);
4810b57cec5SDimitry Andric std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
4820b57cec5SDimitry Andric if (I == TopReadySUs.end()) {
4830b57cec5SDimitry Andric dbgs() << "Data Structure Bug in SI Scheduler\n";
4840b57cec5SDimitry Andric llvm_unreachable(nullptr);
4850b57cec5SDimitry Andric }
4860b57cec5SDimitry Andric TopReadySUs.erase(I);
4870b57cec5SDimitry Andric
4880b57cec5SDimitry Andric releaseSuccessors(SU, true);
4890b57cec5SDimitry Andric // Scheduling this node will trigger a wait,
4900b57cec5SDimitry Andric // thus propagate to other instructions that they do not need to wait either.
4910b57cec5SDimitry Andric if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
4920b57cec5SDimitry Andric HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
4930b57cec5SDimitry Andric
4940b57cec5SDimitry Andric if (DAG->IsLowLatencySU[SU->NodeNum]) {
4950b57cec5SDimitry Andric for (SDep& Succ : SU->Succs) {
4960b57cec5SDimitry Andric std::map<unsigned, unsigned>::iterator I =
4970b57cec5SDimitry Andric NodeNum2Index.find(Succ.getSUnit()->NodeNum);
4980b57cec5SDimitry Andric if (I != NodeNum2Index.end())
4990b57cec5SDimitry Andric HasLowLatencyNonWaitedParent[I->second] = 1;
5000b57cec5SDimitry Andric }
5010b57cec5SDimitry Andric }
5020b57cec5SDimitry Andric SU->isScheduled = true;
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric
finalizeUnits()5050b57cec5SDimitry Andric void SIScheduleBlock::finalizeUnits() {
5060b57cec5SDimitry Andric // We remove links from outside blocks to enable scheduling inside the block.
5070b57cec5SDimitry Andric for (SUnit* SU : SUnits) {
5080b57cec5SDimitry Andric releaseSuccessors(SU, false);
5090b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU->NodeNum])
5100b57cec5SDimitry Andric HighLatencyBlock = true;
5110b57cec5SDimitry Andric }
5120b57cec5SDimitry Andric HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
5130b57cec5SDimitry Andric }
5140b57cec5SDimitry Andric
5150b57cec5SDimitry Andric // we maintain ascending order of IDs
addPred(SIScheduleBlock * Pred)5160b57cec5SDimitry Andric void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
5170b57cec5SDimitry Andric unsigned PredID = Pred->getID();
5180b57cec5SDimitry Andric
5190b57cec5SDimitry Andric // Check if not already predecessor.
5200b57cec5SDimitry Andric for (SIScheduleBlock* P : Preds) {
5210b57cec5SDimitry Andric if (PredID == P->getID())
5220b57cec5SDimitry Andric return;
5230b57cec5SDimitry Andric }
5240b57cec5SDimitry Andric Preds.push_back(Pred);
5250b57cec5SDimitry Andric
5260b57cec5SDimitry Andric assert(none_of(Succs,
5270b57cec5SDimitry Andric [=](std::pair<SIScheduleBlock*,
5280b57cec5SDimitry Andric SIScheduleBlockLinkKind> S) {
5290b57cec5SDimitry Andric return PredID == S.first->getID();
5300b57cec5SDimitry Andric }) &&
5310b57cec5SDimitry Andric "Loop in the Block Graph!");
5320b57cec5SDimitry Andric }
5330b57cec5SDimitry Andric
addSucc(SIScheduleBlock * Succ,SIScheduleBlockLinkKind Kind)5340b57cec5SDimitry Andric void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
5350b57cec5SDimitry Andric SIScheduleBlockLinkKind Kind) {
5360b57cec5SDimitry Andric unsigned SuccID = Succ->getID();
5370b57cec5SDimitry Andric
5380b57cec5SDimitry Andric // Check if not already predecessor.
5390b57cec5SDimitry Andric for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
5400b57cec5SDimitry Andric if (SuccID == S.first->getID()) {
5410b57cec5SDimitry Andric if (S.second == SIScheduleBlockLinkKind::NoData &&
5420b57cec5SDimitry Andric Kind == SIScheduleBlockLinkKind::Data)
5430b57cec5SDimitry Andric S.second = Kind;
5440b57cec5SDimitry Andric return;
5450b57cec5SDimitry Andric }
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric if (Succ->isHighLatencyBlock())
5480b57cec5SDimitry Andric ++NumHighLatencySuccessors;
549*0fca6ea1SDimitry Andric Succs.emplace_back(Succ, Kind);
5500b57cec5SDimitry Andric
5510b57cec5SDimitry Andric assert(none_of(Preds,
5520b57cec5SDimitry Andric [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&
5530b57cec5SDimitry Andric "Loop in the Block Graph!");
5540b57cec5SDimitry Andric }
5550b57cec5SDimitry Andric
5560b57cec5SDimitry Andric #ifndef NDEBUG
printDebug(bool full)5570b57cec5SDimitry Andric void SIScheduleBlock::printDebug(bool full) {
5580b57cec5SDimitry Andric dbgs() << "Block (" << ID << ")\n";
5590b57cec5SDimitry Andric if (!full)
5600b57cec5SDimitry Andric return;
5610b57cec5SDimitry Andric
5620b57cec5SDimitry Andric dbgs() << "\nContains High Latency Instruction: "
5630b57cec5SDimitry Andric << HighLatencyBlock << '\n';
5640b57cec5SDimitry Andric dbgs() << "\nDepends On:\n";
5650b57cec5SDimitry Andric for (SIScheduleBlock* P : Preds) {
5660b57cec5SDimitry Andric P->printDebug(false);
5670b57cec5SDimitry Andric }
5680b57cec5SDimitry Andric
5690b57cec5SDimitry Andric dbgs() << "\nSuccessors:\n";
5700b57cec5SDimitry Andric for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
5710b57cec5SDimitry Andric if (S.second == SIScheduleBlockLinkKind::Data)
5720b57cec5SDimitry Andric dbgs() << "(Data Dep) ";
5730b57cec5SDimitry Andric S.first->printDebug(false);
5740b57cec5SDimitry Andric }
5750b57cec5SDimitry Andric
5760b57cec5SDimitry Andric if (Scheduled) {
5775ffd83dbSDimitry Andric dbgs() << "LiveInPressure "
5785ffd83dbSDimitry Andric << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
5795ffd83dbSDimitry Andric << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
5805ffd83dbSDimitry Andric dbgs() << "LiveOutPressure "
5815ffd83dbSDimitry Andric << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
5825ffd83dbSDimitry Andric << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
5830b57cec5SDimitry Andric dbgs() << "LiveIns:\n";
5840b57cec5SDimitry Andric for (unsigned Reg : LiveInRegs)
5850b57cec5SDimitry Andric dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
5860b57cec5SDimitry Andric
5870b57cec5SDimitry Andric dbgs() << "\nLiveOuts:\n";
5880b57cec5SDimitry Andric for (unsigned Reg : LiveOutRegs)
5890b57cec5SDimitry Andric dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
5900b57cec5SDimitry Andric }
5910b57cec5SDimitry Andric
5920b57cec5SDimitry Andric dbgs() << "\nInstructions:\n";
5930b57cec5SDimitry Andric for (const SUnit* SU : SUnits)
5940b57cec5SDimitry Andric DAG->dumpNode(*SU);
5950b57cec5SDimitry Andric
5960b57cec5SDimitry Andric dbgs() << "///////////////////////\n";
5970b57cec5SDimitry Andric }
5980b57cec5SDimitry Andric #endif
5990b57cec5SDimitry Andric
6000b57cec5SDimitry Andric // SIScheduleBlockCreator //
6010b57cec5SDimitry Andric
SIScheduleBlockCreator(SIScheduleDAGMI * DAG)602480093f4SDimitry Andric SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
603480093f4SDimitry Andric : DAG(DAG) {}
6040b57cec5SDimitry Andric
6050b57cec5SDimitry Andric SIScheduleBlocks
getBlocks(SISchedulerBlockCreatorVariant BlockVariant)6060b57cec5SDimitry Andric SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
6070b57cec5SDimitry Andric std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
6080b57cec5SDimitry Andric Blocks.find(BlockVariant);
6090b57cec5SDimitry Andric if (B == Blocks.end()) {
6100b57cec5SDimitry Andric SIScheduleBlocks Res;
6110b57cec5SDimitry Andric createBlocksForVariant(BlockVariant);
6120b57cec5SDimitry Andric topologicalSort();
6130b57cec5SDimitry Andric scheduleInsideBlocks();
6140b57cec5SDimitry Andric fillStats();
6150b57cec5SDimitry Andric Res.Blocks = CurrentBlocks;
6160b57cec5SDimitry Andric Res.TopDownIndex2Block = TopDownIndex2Block;
6170b57cec5SDimitry Andric Res.TopDownBlock2Index = TopDownBlock2Index;
6180b57cec5SDimitry Andric Blocks[BlockVariant] = Res;
6190b57cec5SDimitry Andric return Res;
6200b57cec5SDimitry Andric }
621*0fca6ea1SDimitry Andric return B->second;
6220b57cec5SDimitry Andric }
6230b57cec5SDimitry Andric
isSUInBlock(SUnit * SU,unsigned ID)6240b57cec5SDimitry Andric bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
6250b57cec5SDimitry Andric if (SU->NodeNum >= DAG->SUnits.size())
6260b57cec5SDimitry Andric return false;
6270b57cec5SDimitry Andric return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
6280b57cec5SDimitry Andric }
6290b57cec5SDimitry Andric
colorHighLatenciesAlone()6300b57cec5SDimitry Andric void SIScheduleBlockCreator::colorHighLatenciesAlone() {
6310b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
6320b57cec5SDimitry Andric
6330b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
6340b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i];
6350b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU->NodeNum]) {
6360b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = NextReservedID++;
6370b57cec5SDimitry Andric }
6380b57cec5SDimitry Andric }
6390b57cec5SDimitry Andric }
6400b57cec5SDimitry Andric
6410b57cec5SDimitry Andric static bool
hasDataDependencyPred(const SUnit & SU,const SUnit & FromSU)6420b57cec5SDimitry Andric hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
6430b57cec5SDimitry Andric for (const auto &PredDep : SU.Preds) {
6440b57cec5SDimitry Andric if (PredDep.getSUnit() == &FromSU &&
6450b57cec5SDimitry Andric PredDep.getKind() == llvm::SDep::Data)
6460b57cec5SDimitry Andric return true;
6470b57cec5SDimitry Andric }
6480b57cec5SDimitry Andric return false;
6490b57cec5SDimitry Andric }
6500b57cec5SDimitry Andric
colorHighLatenciesGroups()6510b57cec5SDimitry Andric void SIScheduleBlockCreator::colorHighLatenciesGroups() {
6520b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
6530b57cec5SDimitry Andric unsigned NumHighLatencies = 0;
6540b57cec5SDimitry Andric unsigned GroupSize;
6550b57cec5SDimitry Andric int Color = NextReservedID;
6560b57cec5SDimitry Andric unsigned Count = 0;
6570b57cec5SDimitry Andric std::set<unsigned> FormingGroup;
6580b57cec5SDimitry Andric
6590b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
6600b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i];
6610b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU->NodeNum])
6620b57cec5SDimitry Andric ++NumHighLatencies;
6630b57cec5SDimitry Andric }
6640b57cec5SDimitry Andric
6650b57cec5SDimitry Andric if (NumHighLatencies == 0)
6660b57cec5SDimitry Andric return;
6670b57cec5SDimitry Andric
6680b57cec5SDimitry Andric if (NumHighLatencies <= 6)
6690b57cec5SDimitry Andric GroupSize = 2;
6700b57cec5SDimitry Andric else if (NumHighLatencies <= 12)
6710b57cec5SDimitry Andric GroupSize = 3;
6720b57cec5SDimitry Andric else
6730b57cec5SDimitry Andric GroupSize = 4;
6740b57cec5SDimitry Andric
6750b57cec5SDimitry Andric for (unsigned SUNum : DAG->TopDownIndex2SU) {
6760b57cec5SDimitry Andric const SUnit &SU = DAG->SUnits[SUNum];
6770b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU.NodeNum]) {
6780b57cec5SDimitry Andric unsigned CompatibleGroup = true;
6790b57cec5SDimitry Andric int ProposedColor = Color;
6800b57cec5SDimitry Andric std::vector<int> AdditionalElements;
6810b57cec5SDimitry Andric
6820b57cec5SDimitry Andric // We don't want to put in the same block
6830b57cec5SDimitry Andric // two high latency instructions that depend
6840b57cec5SDimitry Andric // on each other.
6850b57cec5SDimitry Andric // One way would be to check canAddEdge
6860b57cec5SDimitry Andric // in both directions, but that currently is not
6870b57cec5SDimitry Andric // enough because there the high latency order is
6880b57cec5SDimitry Andric // enforced (via links).
6890b57cec5SDimitry Andric // Instead, look at the dependencies between the
6900b57cec5SDimitry Andric // high latency instructions and deduce if it is
6910b57cec5SDimitry Andric // a data dependency or not.
6920b57cec5SDimitry Andric for (unsigned j : FormingGroup) {
6930b57cec5SDimitry Andric bool HasSubGraph;
6940b57cec5SDimitry Andric std::vector<int> SubGraph;
6950b57cec5SDimitry Andric // By construction (topological order), if SU and
69681ad6265SDimitry Andric // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
6970b57cec5SDimitry Andric // in the parent graph of SU.
6980b57cec5SDimitry Andric #ifndef NDEBUG
6990b57cec5SDimitry Andric SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
7000b57cec5SDimitry Andric HasSubGraph);
7010b57cec5SDimitry Andric assert(!HasSubGraph);
7020b57cec5SDimitry Andric #endif
7030b57cec5SDimitry Andric SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
7040b57cec5SDimitry Andric HasSubGraph);
7050b57cec5SDimitry Andric if (!HasSubGraph)
7060b57cec5SDimitry Andric continue; // No dependencies between each other
707*0fca6ea1SDimitry Andric if (SubGraph.size() > 5) {
7080b57cec5SDimitry Andric // Too many elements would be required to be added to the block.
7090b57cec5SDimitry Andric CompatibleGroup = false;
7100b57cec5SDimitry Andric break;
7110b57cec5SDimitry Andric }
7120b57cec5SDimitry Andric // Check the type of dependency
7130b57cec5SDimitry Andric for (unsigned k : SubGraph) {
7140b57cec5SDimitry Andric // If in the path to join the two instructions,
7150b57cec5SDimitry Andric // there is another high latency instruction,
7160b57cec5SDimitry Andric // or instructions colored for another block
7170b57cec5SDimitry Andric // abort the merge.
718*0fca6ea1SDimitry Andric if (DAG->IsHighLatencySU[k] || (CurrentColoring[k] != ProposedColor &&
7190b57cec5SDimitry Andric CurrentColoring[k] != 0)) {
7200b57cec5SDimitry Andric CompatibleGroup = false;
7210b57cec5SDimitry Andric break;
7220b57cec5SDimitry Andric }
7230b57cec5SDimitry Andric // If one of the SU in the subgraph depends on the result of SU j,
7240b57cec5SDimitry Andric // there'll be a data dependency.
7250b57cec5SDimitry Andric if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
7260b57cec5SDimitry Andric CompatibleGroup = false;
7270b57cec5SDimitry Andric break;
7280b57cec5SDimitry Andric }
7290b57cec5SDimitry Andric }
7300b57cec5SDimitry Andric if (!CompatibleGroup)
7310b57cec5SDimitry Andric break;
7320b57cec5SDimitry Andric // Same check for the SU
7330b57cec5SDimitry Andric if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
7340b57cec5SDimitry Andric CompatibleGroup = false;
7350b57cec5SDimitry Andric break;
7360b57cec5SDimitry Andric }
7370b57cec5SDimitry Andric // Add all the required instructions to the block
7380b57cec5SDimitry Andric // These cannot live in another block (because they
7390b57cec5SDimitry Andric // depend (order dependency) on one of the
7400b57cec5SDimitry Andric // instruction in the block, and are required for the
7410b57cec5SDimitry Andric // high latency instruction we add.
742e8d8bef9SDimitry Andric llvm::append_range(AdditionalElements, SubGraph);
7430b57cec5SDimitry Andric }
7440b57cec5SDimitry Andric if (CompatibleGroup) {
7450b57cec5SDimitry Andric FormingGroup.insert(SU.NodeNum);
7460b57cec5SDimitry Andric for (unsigned j : AdditionalElements)
7470b57cec5SDimitry Andric CurrentColoring[j] = ProposedColor;
7480b57cec5SDimitry Andric CurrentColoring[SU.NodeNum] = ProposedColor;
7490b57cec5SDimitry Andric ++Count;
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric // Found one incompatible instruction,
7520b57cec5SDimitry Andric // or has filled a big enough group.
7530b57cec5SDimitry Andric // -> start a new one.
7540b57cec5SDimitry Andric if (!CompatibleGroup) {
7550b57cec5SDimitry Andric FormingGroup.clear();
7560b57cec5SDimitry Andric Color = ++NextReservedID;
7570b57cec5SDimitry Andric ProposedColor = Color;
7580b57cec5SDimitry Andric FormingGroup.insert(SU.NodeNum);
7590b57cec5SDimitry Andric CurrentColoring[SU.NodeNum] = ProposedColor;
7600b57cec5SDimitry Andric Count = 0;
7610b57cec5SDimitry Andric } else if (Count == GroupSize) {
7620b57cec5SDimitry Andric FormingGroup.clear();
7630b57cec5SDimitry Andric Color = ++NextReservedID;
7640b57cec5SDimitry Andric ProposedColor = Color;
7650b57cec5SDimitry Andric Count = 0;
7660b57cec5SDimitry Andric }
7670b57cec5SDimitry Andric }
7680b57cec5SDimitry Andric }
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric
colorComputeReservedDependencies()7710b57cec5SDimitry Andric void SIScheduleBlockCreator::colorComputeReservedDependencies() {
7720b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
7730b57cec5SDimitry Andric std::map<std::set<unsigned>, unsigned> ColorCombinations;
7740b57cec5SDimitry Andric
7750b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring.clear();
7760b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring.clear();
7770b57cec5SDimitry Andric
7780b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
7790b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
7800b57cec5SDimitry Andric
7810b57cec5SDimitry Andric // Traverse TopDown, and give different colors to SUs depending
7820b57cec5SDimitry Andric // on which combination of High Latencies they depend on.
7830b57cec5SDimitry Andric
7840b57cec5SDimitry Andric for (unsigned SUNum : DAG->TopDownIndex2SU) {
7850b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
7860b57cec5SDimitry Andric std::set<unsigned> SUColors;
7870b57cec5SDimitry Andric
7880b57cec5SDimitry Andric // Already given.
7890b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum]) {
7900b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
7910b57cec5SDimitry Andric CurrentColoring[SU->NodeNum];
7920b57cec5SDimitry Andric continue;
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric
7950b57cec5SDimitry Andric for (SDep& PredDep : SU->Preds) {
7960b57cec5SDimitry Andric SUnit *Pred = PredDep.getSUnit();
7970b57cec5SDimitry Andric if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
7980b57cec5SDimitry Andric continue;
7990b57cec5SDimitry Andric if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
8000b57cec5SDimitry Andric SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
8010b57cec5SDimitry Andric }
8020b57cec5SDimitry Andric // Color 0 by default.
8030b57cec5SDimitry Andric if (SUColors.empty())
8040b57cec5SDimitry Andric continue;
8050b57cec5SDimitry Andric // Same color than parents.
8060b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
8070b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
8080b57cec5SDimitry Andric *SUColors.begin();
8090b57cec5SDimitry Andric else {
8100b57cec5SDimitry Andric std::map<std::set<unsigned>, unsigned>::iterator Pos =
8110b57cec5SDimitry Andric ColorCombinations.find(SUColors);
8120b57cec5SDimitry Andric if (Pos != ColorCombinations.end()) {
8130b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
8140b57cec5SDimitry Andric } else {
8150b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
8160b57cec5SDimitry Andric NextNonReservedID;
8170b57cec5SDimitry Andric ColorCombinations[SUColors] = NextNonReservedID++;
8180b57cec5SDimitry Andric }
8190b57cec5SDimitry Andric }
8200b57cec5SDimitry Andric }
8210b57cec5SDimitry Andric
8220b57cec5SDimitry Andric ColorCombinations.clear();
8230b57cec5SDimitry Andric
8240b57cec5SDimitry Andric // Same as before, but BottomUp.
8250b57cec5SDimitry Andric
8260b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
8270b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
8280b57cec5SDimitry Andric std::set<unsigned> SUColors;
8290b57cec5SDimitry Andric
8300b57cec5SDimitry Andric // Already given.
8310b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum]) {
8320b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
8330b57cec5SDimitry Andric CurrentColoring[SU->NodeNum];
8340b57cec5SDimitry Andric continue;
8350b57cec5SDimitry Andric }
8360b57cec5SDimitry Andric
8370b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
8380b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
8390b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
8400b57cec5SDimitry Andric continue;
8410b57cec5SDimitry Andric if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
8420b57cec5SDimitry Andric SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
8430b57cec5SDimitry Andric }
8440b57cec5SDimitry Andric // Keep color 0.
8450b57cec5SDimitry Andric if (SUColors.empty())
8460b57cec5SDimitry Andric continue;
8470b57cec5SDimitry Andric // Same color than parents.
8480b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
8490b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
8500b57cec5SDimitry Andric *SUColors.begin();
8510b57cec5SDimitry Andric else {
8520b57cec5SDimitry Andric std::map<std::set<unsigned>, unsigned>::iterator Pos =
8530b57cec5SDimitry Andric ColorCombinations.find(SUColors);
8540b57cec5SDimitry Andric if (Pos != ColorCombinations.end()) {
8550b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
8560b57cec5SDimitry Andric } else {
8570b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
8580b57cec5SDimitry Andric NextNonReservedID;
8590b57cec5SDimitry Andric ColorCombinations[SUColors] = NextNonReservedID++;
8600b57cec5SDimitry Andric }
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric }
8640b57cec5SDimitry Andric
colorAccordingToReservedDependencies()8650b57cec5SDimitry Andric void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
8660b57cec5SDimitry Andric std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
8670b57cec5SDimitry Andric
8680b57cec5SDimitry Andric // Every combination of colors given by the top down
8690b57cec5SDimitry Andric // and bottom up Reserved node dependency
8700b57cec5SDimitry Andric
8710eae32dcSDimitry Andric for (const SUnit &SU : DAG->SUnits) {
8720b57cec5SDimitry Andric std::pair<unsigned, unsigned> SUColors;
8730b57cec5SDimitry Andric
8740b57cec5SDimitry Andric // High latency instructions: already given.
8750eae32dcSDimitry Andric if (CurrentColoring[SU.NodeNum])
8760b57cec5SDimitry Andric continue;
8770b57cec5SDimitry Andric
8780eae32dcSDimitry Andric SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];
8790eae32dcSDimitry Andric SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];
8800b57cec5SDimitry Andric
8810b57cec5SDimitry Andric std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
8820b57cec5SDimitry Andric ColorCombinations.find(SUColors);
8830b57cec5SDimitry Andric if (Pos != ColorCombinations.end()) {
8840eae32dcSDimitry Andric CurrentColoring[SU.NodeNum] = Pos->second;
8850b57cec5SDimitry Andric } else {
8860eae32dcSDimitry Andric CurrentColoring[SU.NodeNum] = NextNonReservedID;
8870b57cec5SDimitry Andric ColorCombinations[SUColors] = NextNonReservedID++;
8880b57cec5SDimitry Andric }
8890b57cec5SDimitry Andric }
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric
colorEndsAccordingToDependencies()8920b57cec5SDimitry Andric void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
8930b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
8940b57cec5SDimitry Andric std::vector<int> PendingColoring = CurrentColoring;
8950b57cec5SDimitry Andric
8960b57cec5SDimitry Andric assert(DAGSize >= 1 &&
8970b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
8980b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring.size() == DAGSize);
8990b57cec5SDimitry Andric // If there is no reserved block at all, do nothing. We don't want
9000b57cec5SDimitry Andric // everything in one block.
901*0fca6ea1SDimitry Andric if (*llvm::max_element(CurrentBottomUpReservedDependencyColoring) == 0 &&
902*0fca6ea1SDimitry Andric *llvm::max_element(CurrentTopDownReservedDependencyColoring) == 0)
9030b57cec5SDimitry Andric return;
9040b57cec5SDimitry Andric
9050b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
9060b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
9070b57cec5SDimitry Andric std::set<unsigned> SUColors;
9080b57cec5SDimitry Andric std::set<unsigned> SUColorsPending;
9090b57cec5SDimitry Andric
9100b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
9110b57cec5SDimitry Andric continue;
9120b57cec5SDimitry Andric
9130b57cec5SDimitry Andric if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
9140b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
9150b57cec5SDimitry Andric continue;
9160b57cec5SDimitry Andric
9170b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
9180b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
9190b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
9200b57cec5SDimitry Andric continue;
9210b57cec5SDimitry Andric if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
9220b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
9230b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]);
9240b57cec5SDimitry Andric SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric // If there is only one child/parent block, and that block
9270b57cec5SDimitry Andric // is not among the ones we are removing in this path, then
9280b57cec5SDimitry Andric // merge the instruction to that block
9290b57cec5SDimitry Andric if (SUColors.size() == 1 && SUColorsPending.size() == 1)
9300b57cec5SDimitry Andric PendingColoring[SU->NodeNum] = *SUColors.begin();
9310b57cec5SDimitry Andric else // TODO: Attribute new colors depending on color
9320b57cec5SDimitry Andric // combination of children.
9330b57cec5SDimitry Andric PendingColoring[SU->NodeNum] = NextNonReservedID++;
9340b57cec5SDimitry Andric }
9350b57cec5SDimitry Andric CurrentColoring = PendingColoring;
9360b57cec5SDimitry Andric }
9370b57cec5SDimitry Andric
9380b57cec5SDimitry Andric
colorForceConsecutiveOrderInGroup()9390b57cec5SDimitry Andric void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
9400b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
9410b57cec5SDimitry Andric unsigned PreviousColor;
9420b57cec5SDimitry Andric std::set<unsigned> SeenColors;
9430b57cec5SDimitry Andric
9440b57cec5SDimitry Andric if (DAGSize <= 1)
9450b57cec5SDimitry Andric return;
9460b57cec5SDimitry Andric
9470b57cec5SDimitry Andric PreviousColor = CurrentColoring[0];
9480b57cec5SDimitry Andric
9490b57cec5SDimitry Andric for (unsigned i = 1, e = DAGSize; i != e; ++i) {
9500b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i];
9510b57cec5SDimitry Andric unsigned CurrentColor = CurrentColoring[i];
9520b57cec5SDimitry Andric unsigned PreviousColorSave = PreviousColor;
9530b57cec5SDimitry Andric assert(i == SU->NodeNum);
9540b57cec5SDimitry Andric
9550b57cec5SDimitry Andric if (CurrentColor != PreviousColor)
9560b57cec5SDimitry Andric SeenColors.insert(PreviousColor);
9570b57cec5SDimitry Andric PreviousColor = CurrentColor;
9580b57cec5SDimitry Andric
9590b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
9600b57cec5SDimitry Andric continue;
9610b57cec5SDimitry Andric
9620b57cec5SDimitry Andric if (SeenColors.find(CurrentColor) == SeenColors.end())
9630b57cec5SDimitry Andric continue;
9640b57cec5SDimitry Andric
9650b57cec5SDimitry Andric if (PreviousColorSave != CurrentColor)
9660b57cec5SDimitry Andric CurrentColoring[i] = NextNonReservedID++;
9670b57cec5SDimitry Andric else
9680b57cec5SDimitry Andric CurrentColoring[i] = CurrentColoring[i-1];
9690b57cec5SDimitry Andric }
9700b57cec5SDimitry Andric }
9710b57cec5SDimitry Andric
colorMergeConstantLoadsNextGroup()9720b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
9730b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
9740b57cec5SDimitry Andric
9750b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
9760b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
9770b57cec5SDimitry Andric std::set<unsigned> SUColors;
9780b57cec5SDimitry Andric
9790b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
9800b57cec5SDimitry Andric continue;
9810b57cec5SDimitry Andric
9820b57cec5SDimitry Andric // No predecessor: Vgpr constant loading.
9830b57cec5SDimitry Andric // Low latency instructions usually have a predecessor (the address)
9840b57cec5SDimitry Andric if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
9850b57cec5SDimitry Andric continue;
9860b57cec5SDimitry Andric
9870b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
9880b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
9890b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
9900b57cec5SDimitry Andric continue;
9910b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]);
9920b57cec5SDimitry Andric }
9930b57cec5SDimitry Andric if (SUColors.size() == 1)
9940b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin();
9950b57cec5SDimitry Andric }
9960b57cec5SDimitry Andric }
9970b57cec5SDimitry Andric
colorMergeIfPossibleNextGroup()9980b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
9990b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
10000b57cec5SDimitry Andric
10010b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10020b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
10030b57cec5SDimitry Andric std::set<unsigned> SUColors;
10040b57cec5SDimitry Andric
10050b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10060b57cec5SDimitry Andric continue;
10070b57cec5SDimitry Andric
10080b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
10090b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
10100b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10110b57cec5SDimitry Andric continue;
10120b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]);
10130b57cec5SDimitry Andric }
10140b57cec5SDimitry Andric if (SUColors.size() == 1)
10150b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin();
10160b57cec5SDimitry Andric }
10170b57cec5SDimitry Andric }
10180b57cec5SDimitry Andric
colorMergeIfPossibleNextGroupOnlyForReserved()10190b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
10200b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
10210b57cec5SDimitry Andric
10220b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10230b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
10240b57cec5SDimitry Andric std::set<unsigned> SUColors;
10250b57cec5SDimitry Andric
10260b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10270b57cec5SDimitry Andric continue;
10280b57cec5SDimitry Andric
10290b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
10300b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
10310b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10320b57cec5SDimitry Andric continue;
10330b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]);
10340b57cec5SDimitry Andric }
10350b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
10360b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin();
10370b57cec5SDimitry Andric }
10380b57cec5SDimitry Andric }
10390b57cec5SDimitry Andric
colorMergeIfPossibleSmallGroupsToNextGroup()10400b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
10410b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
10420b57cec5SDimitry Andric std::map<unsigned, unsigned> ColorCount;
10430b57cec5SDimitry Andric
10440b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10450b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
10460b57cec5SDimitry Andric unsigned color = CurrentColoring[SU->NodeNum];
10470b57cec5SDimitry Andric ++ColorCount[color];
10480b57cec5SDimitry Andric }
10490b57cec5SDimitry Andric
10500b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10510b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
10520b57cec5SDimitry Andric unsigned color = CurrentColoring[SU->NodeNum];
10530b57cec5SDimitry Andric std::set<unsigned> SUColors;
10540b57cec5SDimitry Andric
10550b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10560b57cec5SDimitry Andric continue;
10570b57cec5SDimitry Andric
10580b57cec5SDimitry Andric if (ColorCount[color] > 1)
10590b57cec5SDimitry Andric continue;
10600b57cec5SDimitry Andric
10610b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
10620b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
10630b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10640b57cec5SDimitry Andric continue;
10650b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]);
10660b57cec5SDimitry Andric }
10670b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() != color) {
10680b57cec5SDimitry Andric --ColorCount[color];
10690b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin();
10700b57cec5SDimitry Andric ++ColorCount[*SUColors.begin()];
10710b57cec5SDimitry Andric }
10720b57cec5SDimitry Andric }
10730b57cec5SDimitry Andric }
10740b57cec5SDimitry Andric
cutHugeBlocks()10750b57cec5SDimitry Andric void SIScheduleBlockCreator::cutHugeBlocks() {
10760b57cec5SDimitry Andric // TODO
10770b57cec5SDimitry Andric }
10780b57cec5SDimitry Andric
regroupNoUserInstructions()10790b57cec5SDimitry Andric void SIScheduleBlockCreator::regroupNoUserInstructions() {
10800b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
10810b57cec5SDimitry Andric int GroupID = NextNonReservedID++;
10820b57cec5SDimitry Andric
10830b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10840b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum];
10850b57cec5SDimitry Andric bool hasSuccessor = false;
10860b57cec5SDimitry Andric
10870b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10880b57cec5SDimitry Andric continue;
10890b57cec5SDimitry Andric
10900b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
10910b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
10920b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10930b57cec5SDimitry Andric continue;
10940b57cec5SDimitry Andric hasSuccessor = true;
10950b57cec5SDimitry Andric }
10960b57cec5SDimitry Andric if (!hasSuccessor)
10970b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = GroupID;
10980b57cec5SDimitry Andric }
10990b57cec5SDimitry Andric }
11000b57cec5SDimitry Andric
colorExports()11010b57cec5SDimitry Andric void SIScheduleBlockCreator::colorExports() {
11020b57cec5SDimitry Andric unsigned ExportColor = NextNonReservedID++;
11030b57cec5SDimitry Andric SmallVector<unsigned, 8> ExpGroup;
11040b57cec5SDimitry Andric
11050b57cec5SDimitry Andric // Put all exports together in a block.
11060b57cec5SDimitry Andric // The block will naturally end up being scheduled last,
11070b57cec5SDimitry Andric // thus putting exports at the end of the schedule, which
11080b57cec5SDimitry Andric // is better for performance.
11090b57cec5SDimitry Andric // However we must ensure, for safety, the exports can be put
11100b57cec5SDimitry Andric // together in the same block without any other instruction.
11110b57cec5SDimitry Andric // This could happen, for example, when scheduling after regalloc
11120b57cec5SDimitry Andric // if reloading a spilled register from memory using the same
11130b57cec5SDimitry Andric // register than used in a previous export.
11140b57cec5SDimitry Andric // If that happens, do not regroup the exports.
11150b57cec5SDimitry Andric for (unsigned SUNum : DAG->TopDownIndex2SU) {
11160b57cec5SDimitry Andric const SUnit &SU = DAG->SUnits[SUNum];
11170b57cec5SDimitry Andric if (SIInstrInfo::isEXP(*SU.getInstr())) {
111881ad6265SDimitry Andric // SU is an export instruction. Check whether one of its successor
111981ad6265SDimitry Andric // dependencies is a non-export, in which case we skip export grouping.
112081ad6265SDimitry Andric for (const SDep &SuccDep : SU.Succs) {
112181ad6265SDimitry Andric const SUnit *SuccSU = SuccDep.getSUnit();
112281ad6265SDimitry Andric if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) {
112381ad6265SDimitry Andric // Ignore these dependencies.
112481ad6265SDimitry Andric continue;
112581ad6265SDimitry Andric }
112681ad6265SDimitry Andric assert(SuccSU->isInstr() &&
112781ad6265SDimitry Andric "SUnit unexpectedly not representing an instruction!");
11280b57cec5SDimitry Andric
112981ad6265SDimitry Andric if (!SIInstrInfo::isEXP(*SuccSU->getInstr())) {
113081ad6265SDimitry Andric // A non-export depends on us. Skip export grouping.
113181ad6265SDimitry Andric // Note that this is a bit pessimistic: We could still group all other
113281ad6265SDimitry Andric // exports that are not depended on by non-exports, directly or
113381ad6265SDimitry Andric // indirectly. Simply skipping this particular export but grouping all
113481ad6265SDimitry Andric // others would not account for indirect dependencies.
11350b57cec5SDimitry Andric return;
11360b57cec5SDimitry Andric }
11370b57cec5SDimitry Andric }
11380b57cec5SDimitry Andric ExpGroup.push_back(SUNum);
11390b57cec5SDimitry Andric }
11400b57cec5SDimitry Andric }
11410b57cec5SDimitry Andric
11420b57cec5SDimitry Andric // The group can be formed. Give the color.
11430b57cec5SDimitry Andric for (unsigned j : ExpGroup)
11440b57cec5SDimitry Andric CurrentColoring[j] = ExportColor;
11450b57cec5SDimitry Andric }
11460b57cec5SDimitry Andric
createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant)11470b57cec5SDimitry Andric void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
11480b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size();
11490b57cec5SDimitry Andric std::map<unsigned,unsigned> RealID;
11500b57cec5SDimitry Andric
11510b57cec5SDimitry Andric CurrentBlocks.clear();
11520b57cec5SDimitry Andric CurrentColoring.clear();
11530b57cec5SDimitry Andric CurrentColoring.resize(DAGSize, 0);
11540b57cec5SDimitry Andric Node2CurrentBlock.clear();
11550b57cec5SDimitry Andric
11560b57cec5SDimitry Andric // Restore links previous scheduling variant has overridden.
11570b57cec5SDimitry Andric DAG->restoreSULinksLeft();
11580b57cec5SDimitry Andric
11590b57cec5SDimitry Andric NextReservedID = 1;
11600b57cec5SDimitry Andric NextNonReservedID = DAGSize + 1;
11610b57cec5SDimitry Andric
11620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Coloring the graph\n");
11630b57cec5SDimitry Andric
11640b57cec5SDimitry Andric if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
11650b57cec5SDimitry Andric colorHighLatenciesGroups();
11660b57cec5SDimitry Andric else
11670b57cec5SDimitry Andric colorHighLatenciesAlone();
11680b57cec5SDimitry Andric colorComputeReservedDependencies();
11690b57cec5SDimitry Andric colorAccordingToReservedDependencies();
11700b57cec5SDimitry Andric colorEndsAccordingToDependencies();
11710b57cec5SDimitry Andric if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
11720b57cec5SDimitry Andric colorForceConsecutiveOrderInGroup();
11730b57cec5SDimitry Andric regroupNoUserInstructions();
11740b57cec5SDimitry Andric colorMergeConstantLoadsNextGroup();
11750b57cec5SDimitry Andric colorMergeIfPossibleNextGroupOnlyForReserved();
11760b57cec5SDimitry Andric colorExports();
11770b57cec5SDimitry Andric
11780b57cec5SDimitry Andric // Put SUs of same color into same block
11790b57cec5SDimitry Andric Node2CurrentBlock.resize(DAGSize, -1);
11800b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
11810b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i];
11820b57cec5SDimitry Andric unsigned Color = CurrentColoring[SU->NodeNum];
11830b57cec5SDimitry Andric if (RealID.find(Color) == RealID.end()) {
11840b57cec5SDimitry Andric int ID = CurrentBlocks.size();
11858bcb0991SDimitry Andric BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
11860b57cec5SDimitry Andric CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
11870b57cec5SDimitry Andric RealID[Color] = ID;
11880b57cec5SDimitry Andric }
11890b57cec5SDimitry Andric CurrentBlocks[RealID[Color]]->addUnit(SU);
11900b57cec5SDimitry Andric Node2CurrentBlock[SU->NodeNum] = RealID[Color];
11910b57cec5SDimitry Andric }
11920b57cec5SDimitry Andric
11930b57cec5SDimitry Andric // Build dependencies between blocks.
11940b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
11950b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i];
11960b57cec5SDimitry Andric int SUID = Node2CurrentBlock[i];
11970b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
11980b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
11990b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
12000b57cec5SDimitry Andric continue;
12010b57cec5SDimitry Andric if (Node2CurrentBlock[Succ->NodeNum] != SUID)
12020b57cec5SDimitry Andric CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
12030b57cec5SDimitry Andric SuccDep.isCtrl() ? NoData : Data);
12040b57cec5SDimitry Andric }
12050b57cec5SDimitry Andric for (SDep& PredDep : SU->Preds) {
12060b57cec5SDimitry Andric SUnit *Pred = PredDep.getSUnit();
12070b57cec5SDimitry Andric if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
12080b57cec5SDimitry Andric continue;
12090b57cec5SDimitry Andric if (Node2CurrentBlock[Pred->NodeNum] != SUID)
12100b57cec5SDimitry Andric CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
12110b57cec5SDimitry Andric }
12120b57cec5SDimitry Andric }
12130b57cec5SDimitry Andric
12140b57cec5SDimitry Andric // Free root and leafs of all blocks to enable scheduling inside them.
12150eae32dcSDimitry Andric for (SIScheduleBlock *Block : CurrentBlocks)
12160b57cec5SDimitry Andric Block->finalizeUnits();
12170eae32dcSDimitry Andric LLVM_DEBUG({
12180eae32dcSDimitry Andric dbgs() << "Blocks created:\n\n";
12190eae32dcSDimitry Andric for (SIScheduleBlock *Block : CurrentBlocks)
12200b57cec5SDimitry Andric Block->printDebug(true);
12210b57cec5SDimitry Andric });
12220b57cec5SDimitry Andric }
12230b57cec5SDimitry Andric
12240b57cec5SDimitry Andric // Two functions taken from Codegen/MachineScheduler.cpp
12250b57cec5SDimitry Andric
12260b57cec5SDimitry Andric /// Non-const version.
12270b57cec5SDimitry Andric static MachineBasicBlock::iterator
nextIfDebug(MachineBasicBlock::iterator I,MachineBasicBlock::const_iterator End)12280b57cec5SDimitry Andric nextIfDebug(MachineBasicBlock::iterator I,
12290b57cec5SDimitry Andric MachineBasicBlock::const_iterator End) {
12300b57cec5SDimitry Andric for (; I != End; ++I) {
12310b57cec5SDimitry Andric if (!I->isDebugInstr())
12320b57cec5SDimitry Andric break;
12330b57cec5SDimitry Andric }
12340b57cec5SDimitry Andric return I;
12350b57cec5SDimitry Andric }
12360b57cec5SDimitry Andric
topologicalSort()12370b57cec5SDimitry Andric void SIScheduleBlockCreator::topologicalSort() {
12380b57cec5SDimitry Andric unsigned DAGSize = CurrentBlocks.size();
12390b57cec5SDimitry Andric std::vector<int> WorkList;
12400b57cec5SDimitry Andric
12410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Topological Sort\n");
12420b57cec5SDimitry Andric
12430b57cec5SDimitry Andric WorkList.reserve(DAGSize);
12440b57cec5SDimitry Andric TopDownIndex2Block.resize(DAGSize);
12450b57cec5SDimitry Andric TopDownBlock2Index.resize(DAGSize);
12460b57cec5SDimitry Andric BottomUpIndex2Block.resize(DAGSize);
12470b57cec5SDimitry Andric
12480b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
12490b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i];
12500b57cec5SDimitry Andric unsigned Degree = Block->getSuccs().size();
12510b57cec5SDimitry Andric TopDownBlock2Index[i] = Degree;
12520b57cec5SDimitry Andric if (Degree == 0) {
12530b57cec5SDimitry Andric WorkList.push_back(i);
12540b57cec5SDimitry Andric }
12550b57cec5SDimitry Andric }
12560b57cec5SDimitry Andric
12570b57cec5SDimitry Andric int Id = DAGSize;
12580b57cec5SDimitry Andric while (!WorkList.empty()) {
12590b57cec5SDimitry Andric int i = WorkList.back();
12600b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i];
12610b57cec5SDimitry Andric WorkList.pop_back();
12620b57cec5SDimitry Andric TopDownBlock2Index[i] = --Id;
12630b57cec5SDimitry Andric TopDownIndex2Block[Id] = i;
12640b57cec5SDimitry Andric for (SIScheduleBlock* Pred : Block->getPreds()) {
12650b57cec5SDimitry Andric if (!--TopDownBlock2Index[Pred->getID()])
12660b57cec5SDimitry Andric WorkList.push_back(Pred->getID());
12670b57cec5SDimitry Andric }
12680b57cec5SDimitry Andric }
12690b57cec5SDimitry Andric
12700b57cec5SDimitry Andric #ifndef NDEBUG
12710b57cec5SDimitry Andric // Check correctness of the ordering.
12720b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
12730b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i];
12740b57cec5SDimitry Andric for (SIScheduleBlock* Pred : Block->getPreds()) {
12750b57cec5SDimitry Andric assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
12760b57cec5SDimitry Andric "Wrong Top Down topological sorting");
12770b57cec5SDimitry Andric }
12780b57cec5SDimitry Andric }
12790b57cec5SDimitry Andric #endif
12800b57cec5SDimitry Andric
12810b57cec5SDimitry Andric BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
12820b57cec5SDimitry Andric TopDownIndex2Block.rend());
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric
scheduleInsideBlocks()12850b57cec5SDimitry Andric void SIScheduleBlockCreator::scheduleInsideBlocks() {
12860b57cec5SDimitry Andric unsigned DAGSize = CurrentBlocks.size();
12870b57cec5SDimitry Andric
12880b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n");
12890b57cec5SDimitry Andric
12900b57cec5SDimitry Andric // We do schedule a valid scheduling such that a Block corresponds
12910b57cec5SDimitry Andric // to a range of instructions.
12920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
12930b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
12940b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i];
12950b57cec5SDimitry Andric Block->fastSchedule();
12960b57cec5SDimitry Andric }
12970b57cec5SDimitry Andric
12980b57cec5SDimitry Andric // Note: the following code, and the part restoring previous position
12990b57cec5SDimitry Andric // is by far the most expensive operation of the Scheduler.
13000b57cec5SDimitry Andric
13010b57cec5SDimitry Andric // Do not update CurrentTop.
13020b57cec5SDimitry Andric MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
13030b57cec5SDimitry Andric std::vector<MachineBasicBlock::iterator> PosOld;
13040b57cec5SDimitry Andric std::vector<MachineBasicBlock::iterator> PosNew;
13050b57cec5SDimitry Andric PosOld.reserve(DAG->SUnits.size());
13060b57cec5SDimitry Andric PosNew.reserve(DAG->SUnits.size());
13070b57cec5SDimitry Andric
13080b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13090b57cec5SDimitry Andric int BlockIndice = TopDownIndex2Block[i];
13100b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
13110b57cec5SDimitry Andric std::vector<SUnit*> SUs = Block->getScheduledUnits();
13120b57cec5SDimitry Andric
13130b57cec5SDimitry Andric for (SUnit* SU : SUs) {
13140b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr();
13150b57cec5SDimitry Andric MachineBasicBlock::iterator Pos = MI;
13160b57cec5SDimitry Andric PosOld.push_back(Pos);
13170b57cec5SDimitry Andric if (&*CurrentTopFastSched == MI) {
13180b57cec5SDimitry Andric PosNew.push_back(Pos);
13190b57cec5SDimitry Andric CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
13200b57cec5SDimitry Andric DAG->getCurrentBottom());
13210b57cec5SDimitry Andric } else {
13220b57cec5SDimitry Andric // Update the instruction stream.
13230b57cec5SDimitry Andric DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
13240b57cec5SDimitry Andric
13250b57cec5SDimitry Andric // Update LiveIntervals.
13260b57cec5SDimitry Andric // Note: Moving all instructions and calling handleMove every time
13270b57cec5SDimitry Andric // is the most cpu intensive operation of the scheduler.
13280b57cec5SDimitry Andric // It would gain a lot if there was a way to recompute the
13290b57cec5SDimitry Andric // LiveIntervals for the entire scheduling region.
13300b57cec5SDimitry Andric DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
13310b57cec5SDimitry Andric PosNew.push_back(CurrentTopFastSched);
13320b57cec5SDimitry Andric }
13330b57cec5SDimitry Andric }
13340b57cec5SDimitry Andric }
13350b57cec5SDimitry Andric
13360b57cec5SDimitry Andric // Now we have Block of SUs == Block of MI.
13370b57cec5SDimitry Andric // We do the final schedule for the instructions inside the block.
13380b57cec5SDimitry Andric // The property that all the SUs of the Block are grouped together as MI
13390b57cec5SDimitry Andric // is used for correct reg usage tracking.
13400b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13410b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i];
13420b57cec5SDimitry Andric std::vector<SUnit*> SUs = Block->getScheduledUnits();
13430b57cec5SDimitry Andric Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
13440b57cec5SDimitry Andric }
13450b57cec5SDimitry Andric
13460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Restoring MI Pos\n");
13470b57cec5SDimitry Andric // Restore old ordering (which prevents a LIS->handleMove bug).
13480b57cec5SDimitry Andric for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
13490b57cec5SDimitry Andric MachineBasicBlock::iterator POld = PosOld[i-1];
13500b57cec5SDimitry Andric MachineBasicBlock::iterator PNew = PosNew[i-1];
13510b57cec5SDimitry Andric if (PNew != POld) {
13520b57cec5SDimitry Andric // Update the instruction stream.
13530b57cec5SDimitry Andric DAG->getBB()->splice(POld, DAG->getBB(), PNew);
13540b57cec5SDimitry Andric
13550b57cec5SDimitry Andric // Update LiveIntervals.
13560b57cec5SDimitry Andric DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
13570b57cec5SDimitry Andric }
13580b57cec5SDimitry Andric }
13590b57cec5SDimitry Andric
13600eae32dcSDimitry Andric LLVM_DEBUG({
13610eae32dcSDimitry Andric for (SIScheduleBlock *Block : CurrentBlocks)
13620b57cec5SDimitry Andric Block->printDebug(true);
13630b57cec5SDimitry Andric });
13640b57cec5SDimitry Andric }
13650b57cec5SDimitry Andric
fillStats()13660b57cec5SDimitry Andric void SIScheduleBlockCreator::fillStats() {
13670b57cec5SDimitry Andric unsigned DAGSize = CurrentBlocks.size();
13680b57cec5SDimitry Andric
13690b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13700b57cec5SDimitry Andric int BlockIndice = TopDownIndex2Block[i];
13710b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
13720b57cec5SDimitry Andric if (Block->getPreds().empty())
13730b57cec5SDimitry Andric Block->Depth = 0;
13740b57cec5SDimitry Andric else {
13750b57cec5SDimitry Andric unsigned Depth = 0;
13760b57cec5SDimitry Andric for (SIScheduleBlock *Pred : Block->getPreds()) {
13770b57cec5SDimitry Andric if (Depth < Pred->Depth + Pred->getCost())
13780b57cec5SDimitry Andric Depth = Pred->Depth + Pred->getCost();
13790b57cec5SDimitry Andric }
13800b57cec5SDimitry Andric Block->Depth = Depth;
13810b57cec5SDimitry Andric }
13820b57cec5SDimitry Andric }
13830b57cec5SDimitry Andric
13840b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13850b57cec5SDimitry Andric int BlockIndice = BottomUpIndex2Block[i];
13860b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
13870b57cec5SDimitry Andric if (Block->getSuccs().empty())
13880b57cec5SDimitry Andric Block->Height = 0;
13890b57cec5SDimitry Andric else {
13900b57cec5SDimitry Andric unsigned Height = 0;
13910b57cec5SDimitry Andric for (const auto &Succ : Block->getSuccs())
13920b57cec5SDimitry Andric Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
13930b57cec5SDimitry Andric Block->Height = Height;
13940b57cec5SDimitry Andric }
13950b57cec5SDimitry Andric }
13960b57cec5SDimitry Andric }
13970b57cec5SDimitry Andric
13980b57cec5SDimitry Andric // SIScheduleBlockScheduler //
13990b57cec5SDimitry Andric
SIScheduleBlockScheduler(SIScheduleDAGMI * DAG,SISchedulerBlockSchedulerVariant Variant,SIScheduleBlocks BlocksStruct)14000b57cec5SDimitry Andric SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
14010b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant Variant,
14020b57cec5SDimitry Andric SIScheduleBlocks BlocksStruct) :
14030b57cec5SDimitry Andric DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
14040b57cec5SDimitry Andric LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
14050b57cec5SDimitry Andric SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
14060b57cec5SDimitry Andric
14070b57cec5SDimitry Andric // Fill the usage of every output
14080b57cec5SDimitry Andric // Warning: while by construction we always have a link between two blocks
14090b57cec5SDimitry Andric // when one needs a result from the other, the number of users of an output
14100b57cec5SDimitry Andric // is not the sum of child blocks having as input the same virtual register.
14110b57cec5SDimitry Andric // Here is an example. A produces x and y. B eats x and produces x'.
14120b57cec5SDimitry Andric // C eats x' and y. The register coalescer may have attributed the same
14130b57cec5SDimitry Andric // virtual register to x and x'.
14140b57cec5SDimitry Andric // To count accurately, we do a topological sort. In case the register is
14150b57cec5SDimitry Andric // found for several parents, we increment the usage of the one with the
14160b57cec5SDimitry Andric // highest topological index.
14170b57cec5SDimitry Andric LiveOutRegsNumUsages.resize(Blocks.size());
14180eae32dcSDimitry Andric for (SIScheduleBlock *Block : Blocks) {
14190b57cec5SDimitry Andric for (unsigned Reg : Block->getInRegs()) {
14200b57cec5SDimitry Andric bool Found = false;
14210b57cec5SDimitry Andric int topoInd = -1;
14220b57cec5SDimitry Andric for (SIScheduleBlock* Pred: Block->getPreds()) {
14230b57cec5SDimitry Andric std::set<unsigned> PredOutRegs = Pred->getOutRegs();
14240b57cec5SDimitry Andric std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
14250b57cec5SDimitry Andric
14260b57cec5SDimitry Andric if (RegPos != PredOutRegs.end()) {
14270b57cec5SDimitry Andric Found = true;
14280b57cec5SDimitry Andric if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
14290b57cec5SDimitry Andric topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
14300b57cec5SDimitry Andric }
14310b57cec5SDimitry Andric }
14320b57cec5SDimitry Andric }
14330b57cec5SDimitry Andric
14340b57cec5SDimitry Andric if (!Found)
14350b57cec5SDimitry Andric continue;
14360b57cec5SDimitry Andric
14370b57cec5SDimitry Andric int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
14380b57cec5SDimitry Andric ++LiveOutRegsNumUsages[PredID][Reg];
14390b57cec5SDimitry Andric }
14400b57cec5SDimitry Andric }
14410b57cec5SDimitry Andric
14420b57cec5SDimitry Andric LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
14430b57cec5SDimitry Andric BlockNumPredsLeft.resize(Blocks.size());
14440b57cec5SDimitry Andric BlockNumSuccsLeft.resize(Blocks.size());
14450b57cec5SDimitry Andric
14460b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
14470b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[i];
14480b57cec5SDimitry Andric BlockNumPredsLeft[i] = Block->getPreds().size();
14490b57cec5SDimitry Andric BlockNumSuccsLeft[i] = Block->getSuccs().size();
14500b57cec5SDimitry Andric }
14510b57cec5SDimitry Andric
14520b57cec5SDimitry Andric #ifndef NDEBUG
14530b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
14540b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[i];
14550b57cec5SDimitry Andric assert(Block->getID() == i);
14560b57cec5SDimitry Andric }
14570b57cec5SDimitry Andric #endif
14580b57cec5SDimitry Andric
14590b57cec5SDimitry Andric std::set<unsigned> InRegs = DAG->getInRegs();
14600b57cec5SDimitry Andric addLiveRegs(InRegs);
14610b57cec5SDimitry Andric
14620b57cec5SDimitry Andric // Increase LiveOutRegsNumUsages for blocks
14630b57cec5SDimitry Andric // producing registers consumed in another
14640b57cec5SDimitry Andric // scheduling region.
14650b57cec5SDimitry Andric for (unsigned Reg : DAG->getOutRegs()) {
14660b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
14670b57cec5SDimitry Andric // Do reverse traversal
14680b57cec5SDimitry Andric int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
14690b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[ID];
14700b57cec5SDimitry Andric const std::set<unsigned> &OutRegs = Block->getOutRegs();
14710b57cec5SDimitry Andric
14720b57cec5SDimitry Andric if (OutRegs.find(Reg) == OutRegs.end())
14730b57cec5SDimitry Andric continue;
14740b57cec5SDimitry Andric
14750b57cec5SDimitry Andric ++LiveOutRegsNumUsages[ID][Reg];
14760b57cec5SDimitry Andric break;
14770b57cec5SDimitry Andric }
14780b57cec5SDimitry Andric }
14790b57cec5SDimitry Andric
14800b57cec5SDimitry Andric // Fill LiveRegsConsumers for regs that were already
14810b57cec5SDimitry Andric // defined before scheduling.
14820eae32dcSDimitry Andric for (SIScheduleBlock *Block : Blocks) {
14830b57cec5SDimitry Andric for (unsigned Reg : Block->getInRegs()) {
14840b57cec5SDimitry Andric bool Found = false;
14850b57cec5SDimitry Andric for (SIScheduleBlock* Pred: Block->getPreds()) {
14860b57cec5SDimitry Andric std::set<unsigned> PredOutRegs = Pred->getOutRegs();
14870b57cec5SDimitry Andric std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
14880b57cec5SDimitry Andric
14890b57cec5SDimitry Andric if (RegPos != PredOutRegs.end()) {
14900b57cec5SDimitry Andric Found = true;
14910b57cec5SDimitry Andric break;
14920b57cec5SDimitry Andric }
14930b57cec5SDimitry Andric }
14940b57cec5SDimitry Andric
14950b57cec5SDimitry Andric if (!Found)
14960b57cec5SDimitry Andric ++LiveRegsConsumers[Reg];
14970b57cec5SDimitry Andric }
14980b57cec5SDimitry Andric }
14990b57cec5SDimitry Andric
15000b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
15010b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[i];
15020b57cec5SDimitry Andric if (BlockNumPredsLeft[i] == 0) {
15030b57cec5SDimitry Andric ReadyBlocks.push_back(Block);
15040b57cec5SDimitry Andric }
15050b57cec5SDimitry Andric }
15060b57cec5SDimitry Andric
15070b57cec5SDimitry Andric while (SIScheduleBlock *Block = pickBlock()) {
15080b57cec5SDimitry Andric BlocksScheduled.push_back(Block);
15090b57cec5SDimitry Andric blockScheduled(Block);
15100b57cec5SDimitry Andric }
15110b57cec5SDimitry Andric
15120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Block
15130b57cec5SDimitry Andric : BlocksScheduled) {
15140b57cec5SDimitry Andric dbgs() << ' ' << Block->getID();
15150b57cec5SDimitry Andric } dbgs() << '\n';);
15160b57cec5SDimitry Andric }
15170b57cec5SDimitry Andric
tryCandidateLatency(SIBlockSchedCandidate & Cand,SIBlockSchedCandidate & TryCand)15180b57cec5SDimitry Andric bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
15190b57cec5SDimitry Andric SIBlockSchedCandidate &TryCand) {
15200b57cec5SDimitry Andric if (!Cand.isValid()) {
15210b57cec5SDimitry Andric TryCand.Reason = NodeOrder;
15220b57cec5SDimitry Andric return true;
15230b57cec5SDimitry Andric }
15240b57cec5SDimitry Andric
15250b57cec5SDimitry Andric // Try to hide high latencies.
15260b57cec5SDimitry Andric if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
15270b57cec5SDimitry Andric Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
15280b57cec5SDimitry Andric return true;
15290b57cec5SDimitry Andric // Schedule high latencies early so you can hide them better.
15300b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
15310b57cec5SDimitry Andric TryCand, Cand, Latency))
15320b57cec5SDimitry Andric return true;
15330b57cec5SDimitry Andric if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
15340b57cec5SDimitry Andric TryCand, Cand, Depth))
15350b57cec5SDimitry Andric return true;
15360b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
15370b57cec5SDimitry Andric Cand.NumHighLatencySuccessors,
15380b57cec5SDimitry Andric TryCand, Cand, Successor))
15390b57cec5SDimitry Andric return true;
15400b57cec5SDimitry Andric return false;
15410b57cec5SDimitry Andric }
15420b57cec5SDimitry Andric
tryCandidateRegUsage(SIBlockSchedCandidate & Cand,SIBlockSchedCandidate & TryCand)15430b57cec5SDimitry Andric bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
15440b57cec5SDimitry Andric SIBlockSchedCandidate &TryCand) {
15450b57cec5SDimitry Andric if (!Cand.isValid()) {
15460b57cec5SDimitry Andric TryCand.Reason = NodeOrder;
15470b57cec5SDimitry Andric return true;
15480b57cec5SDimitry Andric }
15490b57cec5SDimitry Andric
15500b57cec5SDimitry Andric if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
15510b57cec5SDimitry Andric TryCand, Cand, RegUsage))
15520b57cec5SDimitry Andric return true;
15530b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.NumSuccessors > 0,
15540b57cec5SDimitry Andric Cand.NumSuccessors > 0,
15550b57cec5SDimitry Andric TryCand, Cand, Successor))
15560b57cec5SDimitry Andric return true;
15570b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
15580b57cec5SDimitry Andric return true;
15590b57cec5SDimitry Andric if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
15600b57cec5SDimitry Andric TryCand, Cand, RegUsage))
15610b57cec5SDimitry Andric return true;
15620b57cec5SDimitry Andric return false;
15630b57cec5SDimitry Andric }
15640b57cec5SDimitry Andric
pickBlock()15650b57cec5SDimitry Andric SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
15660b57cec5SDimitry Andric SIBlockSchedCandidate Cand;
15670b57cec5SDimitry Andric std::vector<SIScheduleBlock*>::iterator Best;
15680b57cec5SDimitry Andric SIScheduleBlock *Block;
15690b57cec5SDimitry Andric if (ReadyBlocks.empty())
15700b57cec5SDimitry Andric return nullptr;
15710b57cec5SDimitry Andric
15720b57cec5SDimitry Andric DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
15730b57cec5SDimitry Andric VregCurrentUsage, SregCurrentUsage);
15740b57cec5SDimitry Andric if (VregCurrentUsage > maxVregUsage)
15750b57cec5SDimitry Andric maxVregUsage = VregCurrentUsage;
15760b57cec5SDimitry Andric if (SregCurrentUsage > maxSregUsage)
15770b57cec5SDimitry Andric maxSregUsage = SregCurrentUsage;
15780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";
15790b57cec5SDimitry Andric for (SIScheduleBlock *Block
15800b57cec5SDimitry Andric : ReadyBlocks) dbgs()
15810b57cec5SDimitry Andric << Block->getID() << ' ';
15820b57cec5SDimitry Andric dbgs() << "\nCurrent Live:\n";
15830b57cec5SDimitry Andric for (unsigned Reg
15840b57cec5SDimitry Andric : LiveRegs) dbgs()
15850b57cec5SDimitry Andric << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
15860b57cec5SDimitry Andric dbgs() << '\n';
15870b57cec5SDimitry Andric dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
15880b57cec5SDimitry Andric dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';);
15890b57cec5SDimitry Andric
15900b57cec5SDimitry Andric Cand.Block = nullptr;
15910b57cec5SDimitry Andric for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
15920b57cec5SDimitry Andric E = ReadyBlocks.end(); I != E; ++I) {
15930b57cec5SDimitry Andric SIBlockSchedCandidate TryCand;
15940b57cec5SDimitry Andric TryCand.Block = *I;
15950b57cec5SDimitry Andric TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
15960b57cec5SDimitry Andric TryCand.VGPRUsageDiff =
15970b57cec5SDimitry Andric checkRegUsageImpact(TryCand.Block->getInRegs(),
15985ffd83dbSDimitry Andric TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
15990b57cec5SDimitry Andric TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
16000b57cec5SDimitry Andric TryCand.NumHighLatencySuccessors =
16010b57cec5SDimitry Andric TryCand.Block->getNumHighLatencySuccessors();
16020b57cec5SDimitry Andric TryCand.LastPosHighLatParentScheduled =
16030b57cec5SDimitry Andric (unsigned int) std::max<int> (0,
16040b57cec5SDimitry Andric LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
16050b57cec5SDimitry Andric LastPosWaitedHighLatency);
16060b57cec5SDimitry Andric TryCand.Height = TryCand.Block->Height;
16070b57cec5SDimitry Andric // Try not to increase VGPR usage too much, else we may spill.
16080b57cec5SDimitry Andric if (VregCurrentUsage > 120 ||
16090b57cec5SDimitry Andric Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
16100b57cec5SDimitry Andric if (!tryCandidateRegUsage(Cand, TryCand) &&
16110b57cec5SDimitry Andric Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
16120b57cec5SDimitry Andric tryCandidateLatency(Cand, TryCand);
16130b57cec5SDimitry Andric } else {
16140b57cec5SDimitry Andric if (!tryCandidateLatency(Cand, TryCand))
16150b57cec5SDimitry Andric tryCandidateRegUsage(Cand, TryCand);
16160b57cec5SDimitry Andric }
16170b57cec5SDimitry Andric if (TryCand.Reason != NoCand) {
16180b57cec5SDimitry Andric Cand.setBest(TryCand);
16190b57cec5SDimitry Andric Best = I;
16200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
16210b57cec5SDimitry Andric << getReasonStr(Cand.Reason) << '\n');
16220b57cec5SDimitry Andric }
16230b57cec5SDimitry Andric }
16240b57cec5SDimitry Andric
16250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';
16260b57cec5SDimitry Andric dbgs() << "Is a block with high latency instruction: "
16270b57cec5SDimitry Andric << (Cand.IsHighLatency ? "yes\n" : "no\n");
16280b57cec5SDimitry Andric dbgs() << "Position of last high latency dependency: "
16290b57cec5SDimitry Andric << Cand.LastPosHighLatParentScheduled << '\n';
16300b57cec5SDimitry Andric dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
16310b57cec5SDimitry Andric dbgs() << '\n';);
16320b57cec5SDimitry Andric
16330b57cec5SDimitry Andric Block = Cand.Block;
16340b57cec5SDimitry Andric ReadyBlocks.erase(Best);
16350b57cec5SDimitry Andric return Block;
16360b57cec5SDimitry Andric }
16370b57cec5SDimitry Andric
16380b57cec5SDimitry Andric // Tracking of currently alive registers to determine VGPR Usage.
16390b57cec5SDimitry Andric
addLiveRegs(std::set<unsigned> & Regs)16400b57cec5SDimitry Andric void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1641e8d8bef9SDimitry Andric for (Register Reg : Regs) {
16420b57cec5SDimitry Andric // For now only track virtual registers.
1643e8d8bef9SDimitry Andric if (!Reg.isVirtual())
16440b57cec5SDimitry Andric continue;
16450b57cec5SDimitry Andric // If not already in the live set, then add it.
16460b57cec5SDimitry Andric (void) LiveRegs.insert(Reg);
16470b57cec5SDimitry Andric }
16480b57cec5SDimitry Andric }
16490b57cec5SDimitry Andric
decreaseLiveRegs(SIScheduleBlock * Block,std::set<unsigned> & Regs)16500b57cec5SDimitry Andric void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
16510b57cec5SDimitry Andric std::set<unsigned> &Regs) {
16520b57cec5SDimitry Andric for (unsigned Reg : Regs) {
16530b57cec5SDimitry Andric // For now only track virtual registers.
16540b57cec5SDimitry Andric std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
16550b57cec5SDimitry Andric assert (Pos != LiveRegs.end() && // Reg must be live.
16560b57cec5SDimitry Andric LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
16570b57cec5SDimitry Andric LiveRegsConsumers[Reg] >= 1);
16580b57cec5SDimitry Andric --LiveRegsConsumers[Reg];
16590b57cec5SDimitry Andric if (LiveRegsConsumers[Reg] == 0)
16600b57cec5SDimitry Andric LiveRegs.erase(Pos);
16610b57cec5SDimitry Andric }
16620b57cec5SDimitry Andric }
16630b57cec5SDimitry Andric
releaseBlockSuccs(SIScheduleBlock * Parent)16640b57cec5SDimitry Andric void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
16650b57cec5SDimitry Andric for (const auto &Block : Parent->getSuccs()) {
16660b57cec5SDimitry Andric if (--BlockNumPredsLeft[Block.first->getID()] == 0)
16670b57cec5SDimitry Andric ReadyBlocks.push_back(Block.first);
16680b57cec5SDimitry Andric
16690b57cec5SDimitry Andric if (Parent->isHighLatencyBlock() &&
16700b57cec5SDimitry Andric Block.second == SIScheduleBlockLinkKind::Data)
16710b57cec5SDimitry Andric LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
16720b57cec5SDimitry Andric }
16730b57cec5SDimitry Andric }
16740b57cec5SDimitry Andric
blockScheduled(SIScheduleBlock * Block)16750b57cec5SDimitry Andric void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
16760b57cec5SDimitry Andric decreaseLiveRegs(Block, Block->getInRegs());
16770b57cec5SDimitry Andric addLiveRegs(Block->getOutRegs());
16780b57cec5SDimitry Andric releaseBlockSuccs(Block);
16790eae32dcSDimitry Andric for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {
16800b57cec5SDimitry Andric // We produce this register, thus it must not be previously alive.
16810b57cec5SDimitry Andric assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
16820b57cec5SDimitry Andric LiveRegsConsumers[RegP.first] == 0);
16830b57cec5SDimitry Andric LiveRegsConsumers[RegP.first] += RegP.second;
16840b57cec5SDimitry Andric }
16850b57cec5SDimitry Andric if (LastPosHighLatencyParentScheduled[Block->getID()] >
16860b57cec5SDimitry Andric (unsigned)LastPosWaitedHighLatency)
16870b57cec5SDimitry Andric LastPosWaitedHighLatency =
16880b57cec5SDimitry Andric LastPosHighLatencyParentScheduled[Block->getID()];
16890b57cec5SDimitry Andric ++NumBlockScheduled;
16900b57cec5SDimitry Andric }
16910b57cec5SDimitry Andric
16920b57cec5SDimitry Andric std::vector<int>
checkRegUsageImpact(std::set<unsigned> & InRegs,std::set<unsigned> & OutRegs)16930b57cec5SDimitry Andric SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
16940b57cec5SDimitry Andric std::set<unsigned> &OutRegs) {
16950b57cec5SDimitry Andric std::vector<int> DiffSetPressure;
16960b57cec5SDimitry Andric DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
16970b57cec5SDimitry Andric
1698e8d8bef9SDimitry Andric for (Register Reg : InRegs) {
16990b57cec5SDimitry Andric // For now only track virtual registers.
1700e8d8bef9SDimitry Andric if (!Reg.isVirtual())
17010b57cec5SDimitry Andric continue;
17020b57cec5SDimitry Andric if (LiveRegsConsumers[Reg] > 1)
17030b57cec5SDimitry Andric continue;
17040b57cec5SDimitry Andric PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
17050b57cec5SDimitry Andric for (; PSetI.isValid(); ++PSetI) {
17060b57cec5SDimitry Andric DiffSetPressure[*PSetI] -= PSetI.getWeight();
17070b57cec5SDimitry Andric }
17080b57cec5SDimitry Andric }
17090b57cec5SDimitry Andric
1710e8d8bef9SDimitry Andric for (Register Reg : OutRegs) {
17110b57cec5SDimitry Andric // For now only track virtual registers.
1712e8d8bef9SDimitry Andric if (!Reg.isVirtual())
17130b57cec5SDimitry Andric continue;
17140b57cec5SDimitry Andric PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
17150b57cec5SDimitry Andric for (; PSetI.isValid(); ++PSetI) {
17160b57cec5SDimitry Andric DiffSetPressure[*PSetI] += PSetI.getWeight();
17170b57cec5SDimitry Andric }
17180b57cec5SDimitry Andric }
17190b57cec5SDimitry Andric
17200b57cec5SDimitry Andric return DiffSetPressure;
17210b57cec5SDimitry Andric }
17220b57cec5SDimitry Andric
17230b57cec5SDimitry Andric // SIScheduler //
17240b57cec5SDimitry Andric
17250b57cec5SDimitry Andric struct SIScheduleBlockResult
scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,SISchedulerBlockSchedulerVariant ScheduleVariant)17260b57cec5SDimitry Andric SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
17270b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant ScheduleVariant) {
17280b57cec5SDimitry Andric SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
17290b57cec5SDimitry Andric SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
17300b57cec5SDimitry Andric std::vector<SIScheduleBlock*> ScheduledBlocks;
17310b57cec5SDimitry Andric struct SIScheduleBlockResult Res;
17320b57cec5SDimitry Andric
17330b57cec5SDimitry Andric ScheduledBlocks = Scheduler.getBlocks();
17340b57cec5SDimitry Andric
17350eae32dcSDimitry Andric for (SIScheduleBlock *Block : ScheduledBlocks) {
17360b57cec5SDimitry Andric std::vector<SUnit*> SUs = Block->getScheduledUnits();
17370b57cec5SDimitry Andric
17380b57cec5SDimitry Andric for (SUnit* SU : SUs)
17390b57cec5SDimitry Andric Res.SUs.push_back(SU->NodeNum);
17400b57cec5SDimitry Andric }
17410b57cec5SDimitry Andric
17420b57cec5SDimitry Andric Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
17430b57cec5SDimitry Andric Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
17440b57cec5SDimitry Andric return Res;
17450b57cec5SDimitry Andric }
17460b57cec5SDimitry Andric
17470b57cec5SDimitry Andric // SIScheduleDAGMI //
17480b57cec5SDimitry Andric
SIScheduleDAGMI(MachineSchedContext * C)17490b57cec5SDimitry Andric SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
17508bcb0991SDimitry Andric ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
17510b57cec5SDimitry Andric SITII = static_cast<const SIInstrInfo*>(TII);
17520b57cec5SDimitry Andric SITRI = static_cast<const SIRegisterInfo*>(TRI);
17530b57cec5SDimitry Andric }
17540b57cec5SDimitry Andric
17550b57cec5SDimitry Andric SIScheduleDAGMI::~SIScheduleDAGMI() = default;
17560b57cec5SDimitry Andric
17570b57cec5SDimitry Andric // Code adapted from scheduleDAG.cpp
17580b57cec5SDimitry Andric // Does a topological sort over the SUs.
17590b57cec5SDimitry Andric // Both TopDown and BottomUp
topologicalSort()17600b57cec5SDimitry Andric void SIScheduleDAGMI::topologicalSort() {
17610b57cec5SDimitry Andric Topo.InitDAGTopologicalSorting();
17620b57cec5SDimitry Andric
17630b57cec5SDimitry Andric TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
17640b57cec5SDimitry Andric BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
17650b57cec5SDimitry Andric }
17660b57cec5SDimitry Andric
17670b57cec5SDimitry Andric // Move low latencies further from their user without
17680b57cec5SDimitry Andric // increasing SGPR usage (in general)
17690b57cec5SDimitry Andric // This is to be replaced by a better pass that would
17700b57cec5SDimitry Andric // take into account SGPR usage (based on VGPR Usage
17710b57cec5SDimitry Andric // and the corresponding wavefront count), that would
17720b57cec5SDimitry Andric // try to merge groups of loads if it make sense, etc
moveLowLatencies()17730b57cec5SDimitry Andric void SIScheduleDAGMI::moveLowLatencies() {
17740b57cec5SDimitry Andric unsigned DAGSize = SUnits.size();
17750b57cec5SDimitry Andric int LastLowLatencyUser = -1;
17760b57cec5SDimitry Andric int LastLowLatencyPos = -1;
17770b57cec5SDimitry Andric
17780b57cec5SDimitry Andric for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
17790b57cec5SDimitry Andric SUnit *SU = &SUnits[ScheduledSUnits[i]];
17800b57cec5SDimitry Andric bool IsLowLatencyUser = false;
17810b57cec5SDimitry Andric unsigned MinPos = 0;
17820b57cec5SDimitry Andric
17830b57cec5SDimitry Andric for (SDep& PredDep : SU->Preds) {
17840b57cec5SDimitry Andric SUnit *Pred = PredDep.getSUnit();
17850b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
17860b57cec5SDimitry Andric IsLowLatencyUser = true;
17870b57cec5SDimitry Andric }
17880b57cec5SDimitry Andric if (Pred->NodeNum >= DAGSize)
17890b57cec5SDimitry Andric continue;
17900b57cec5SDimitry Andric unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
17910b57cec5SDimitry Andric if (PredPos >= MinPos)
17920b57cec5SDimitry Andric MinPos = PredPos + 1;
17930b57cec5SDimitry Andric }
17940b57cec5SDimitry Andric
17950b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
17960b57cec5SDimitry Andric unsigned BestPos = LastLowLatencyUser + 1;
17970b57cec5SDimitry Andric if ((int)BestPos <= LastLowLatencyPos)
17980b57cec5SDimitry Andric BestPos = LastLowLatencyPos + 1;
17990b57cec5SDimitry Andric if (BestPos < MinPos)
18000b57cec5SDimitry Andric BestPos = MinPos;
18010b57cec5SDimitry Andric if (BestPos < i) {
18020b57cec5SDimitry Andric for (unsigned u = i; u > BestPos; --u) {
18030b57cec5SDimitry Andric ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
18040b57cec5SDimitry Andric ScheduledSUnits[u] = ScheduledSUnits[u-1];
18050b57cec5SDimitry Andric }
18060b57cec5SDimitry Andric ScheduledSUnits[BestPos] = SU->NodeNum;
18070b57cec5SDimitry Andric ScheduledSUnitsInv[SU->NodeNum] = BestPos;
18080b57cec5SDimitry Andric }
18090b57cec5SDimitry Andric LastLowLatencyPos = BestPos;
18100b57cec5SDimitry Andric if (IsLowLatencyUser)
18110b57cec5SDimitry Andric LastLowLatencyUser = BestPos;
18120b57cec5SDimitry Andric } else if (IsLowLatencyUser) {
18130b57cec5SDimitry Andric LastLowLatencyUser = i;
18140b57cec5SDimitry Andric // Moves COPY instructions on which depends
18150b57cec5SDimitry Andric // the low latency instructions too.
18160b57cec5SDimitry Andric } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
18170b57cec5SDimitry Andric bool CopyForLowLat = false;
18180b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) {
18190b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit();
18200b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
18210b57cec5SDimitry Andric continue;
18220b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
18230b57cec5SDimitry Andric CopyForLowLat = true;
18240b57cec5SDimitry Andric }
18250b57cec5SDimitry Andric }
18260b57cec5SDimitry Andric if (!CopyForLowLat)
18270b57cec5SDimitry Andric continue;
18280b57cec5SDimitry Andric if (MinPos < i) {
18290b57cec5SDimitry Andric for (unsigned u = i; u > MinPos; --u) {
18300b57cec5SDimitry Andric ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
18310b57cec5SDimitry Andric ScheduledSUnits[u] = ScheduledSUnits[u-1];
18320b57cec5SDimitry Andric }
18330b57cec5SDimitry Andric ScheduledSUnits[MinPos] = SU->NodeNum;
18340b57cec5SDimitry Andric ScheduledSUnitsInv[SU->NodeNum] = MinPos;
18350b57cec5SDimitry Andric }
18360b57cec5SDimitry Andric }
18370b57cec5SDimitry Andric }
18380b57cec5SDimitry Andric }
18390b57cec5SDimitry Andric
restoreSULinksLeft()18400b57cec5SDimitry Andric void SIScheduleDAGMI::restoreSULinksLeft() {
18410b57cec5SDimitry Andric for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
18420b57cec5SDimitry Andric SUnits[i].isScheduled = false;
18430b57cec5SDimitry Andric SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
18440b57cec5SDimitry Andric SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
18450b57cec5SDimitry Andric SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
18460b57cec5SDimitry Andric SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
18470b57cec5SDimitry Andric }
18480b57cec5SDimitry Andric }
18490b57cec5SDimitry Andric
18500b57cec5SDimitry Andric // Return the Vgpr and Sgpr usage corresponding to some virtual registers.
18510b57cec5SDimitry Andric template<typename _Iterator> void
fillVgprSgprCost(_Iterator First,_Iterator End,unsigned & VgprUsage,unsigned & SgprUsage)18520b57cec5SDimitry Andric SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
18530b57cec5SDimitry Andric unsigned &VgprUsage, unsigned &SgprUsage) {
18540b57cec5SDimitry Andric VgprUsage = 0;
18550b57cec5SDimitry Andric SgprUsage = 0;
18560b57cec5SDimitry Andric for (_Iterator RegI = First; RegI != End; ++RegI) {
1857e8d8bef9SDimitry Andric Register Reg = *RegI;
18580b57cec5SDimitry Andric // For now only track virtual registers
1859e8d8bef9SDimitry Andric if (!Reg.isVirtual())
18600b57cec5SDimitry Andric continue;
18610b57cec5SDimitry Andric PSetIterator PSetI = MRI.getPressureSets(Reg);
18620b57cec5SDimitry Andric for (; PSetI.isValid(); ++PSetI) {
18635ffd83dbSDimitry Andric if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
18640b57cec5SDimitry Andric VgprUsage += PSetI.getWeight();
18655ffd83dbSDimitry Andric else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
18660b57cec5SDimitry Andric SgprUsage += PSetI.getWeight();
18670b57cec5SDimitry Andric }
18680b57cec5SDimitry Andric }
18690b57cec5SDimitry Andric }
18700b57cec5SDimitry Andric
schedule()18710b57cec5SDimitry Andric void SIScheduleDAGMI::schedule()
18720b57cec5SDimitry Andric {
18730b57cec5SDimitry Andric SmallVector<SUnit*, 8> TopRoots, BotRoots;
18740b57cec5SDimitry Andric SIScheduleBlockResult Best, Temp;
18750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
18760b57cec5SDimitry Andric
18770b57cec5SDimitry Andric buildDAGWithRegPressure();
187806c3fb27SDimitry Andric postProcessDAG();
1879753f127fSDimitry Andric
18800b57cec5SDimitry Andric LLVM_DEBUG(dump());
1881753f127fSDimitry Andric if (PrintDAGs)
1882753f127fSDimitry Andric dump();
1883753f127fSDimitry Andric if (ViewMISchedDAGs)
1884753f127fSDimitry Andric viewGraph();
18850b57cec5SDimitry Andric
18860b57cec5SDimitry Andric topologicalSort();
18870b57cec5SDimitry Andric findRootsAndBiasEdges(TopRoots, BotRoots);
18880b57cec5SDimitry Andric // We reuse several ScheduleDAGMI and ScheduleDAGMILive
18890b57cec5SDimitry Andric // functions, but to make them happy we must initialize
18900b57cec5SDimitry Andric // the default Scheduler implementation (even if we do not
18910b57cec5SDimitry Andric // run it)
18920b57cec5SDimitry Andric SchedImpl->initialize(this);
18930b57cec5SDimitry Andric initQueues(TopRoots, BotRoots);
18940b57cec5SDimitry Andric
18950b57cec5SDimitry Andric // Fill some stats to help scheduling.
18960b57cec5SDimitry Andric
18970b57cec5SDimitry Andric SUnitsLinksBackup = SUnits;
18980b57cec5SDimitry Andric IsLowLatencySU.clear();
18990b57cec5SDimitry Andric LowLatencyOffset.clear();
19000b57cec5SDimitry Andric IsHighLatencySU.clear();
19010b57cec5SDimitry Andric
19020b57cec5SDimitry Andric IsLowLatencySU.resize(SUnits.size(), 0);
19030b57cec5SDimitry Andric LowLatencyOffset.resize(SUnits.size(), 0);
19040b57cec5SDimitry Andric IsHighLatencySU.resize(SUnits.size(), 0);
19050b57cec5SDimitry Andric
19060b57cec5SDimitry Andric for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
19070b57cec5SDimitry Andric SUnit *SU = &SUnits[i];
19080b57cec5SDimitry Andric const MachineOperand *BaseLatOp;
19090b57cec5SDimitry Andric int64_t OffLatReg;
19100b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
19110b57cec5SDimitry Andric IsLowLatencySU[i] = 1;
19125ffd83dbSDimitry Andric bool OffsetIsScalable;
19130b57cec5SDimitry Andric if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
19145ffd83dbSDimitry Andric OffsetIsScalable, TRI))
19150b57cec5SDimitry Andric LowLatencyOffset[i] = OffLatReg;
19165ffd83dbSDimitry Andric } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
19170b57cec5SDimitry Andric IsHighLatencySU[i] = 1;
19180b57cec5SDimitry Andric }
19190b57cec5SDimitry Andric
19200b57cec5SDimitry Andric SIScheduler Scheduler(this);
19210b57cec5SDimitry Andric Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
19220b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
19230b57cec5SDimitry Andric
19240b57cec5SDimitry Andric // if VGPR usage is extremely high, try other good performing variants
19250b57cec5SDimitry Andric // which could lead to lower VGPR usage
19260b57cec5SDimitry Andric if (Best.MaxVGPRUsage > 180) {
19270b57cec5SDimitry Andric static const std::pair<SISchedulerBlockCreatorVariant,
19280b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant>
19290b57cec5SDimitry Andric Variants[] = {
19300b57cec5SDimitry Andric { LatenciesAlone, BlockRegUsageLatency },
19310b57cec5SDimitry Andric // { LatenciesAlone, BlockRegUsage },
19320b57cec5SDimitry Andric { LatenciesGrouped, BlockLatencyRegUsage },
19330b57cec5SDimitry Andric // { LatenciesGrouped, BlockRegUsageLatency },
19340b57cec5SDimitry Andric // { LatenciesGrouped, BlockRegUsage },
19350b57cec5SDimitry Andric { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
19360b57cec5SDimitry Andric // { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
19370b57cec5SDimitry Andric // { LatenciesAlonePlusConsecutive, BlockRegUsage }
19380b57cec5SDimitry Andric };
19390b57cec5SDimitry Andric for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
19400b57cec5SDimitry Andric Temp = Scheduler.scheduleVariant(v.first, v.second);
19410b57cec5SDimitry Andric if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
19420b57cec5SDimitry Andric Best = Temp;
19430b57cec5SDimitry Andric }
19440b57cec5SDimitry Andric }
19450b57cec5SDimitry Andric // if VGPR usage is still extremely high, we may spill. Try other variants
19460b57cec5SDimitry Andric // which are less performing, but that could lead to lower VGPR usage.
19470b57cec5SDimitry Andric if (Best.MaxVGPRUsage > 200) {
19480b57cec5SDimitry Andric static const std::pair<SISchedulerBlockCreatorVariant,
19490b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant>
19500b57cec5SDimitry Andric Variants[] = {
19510b57cec5SDimitry Andric // { LatenciesAlone, BlockRegUsageLatency },
19520b57cec5SDimitry Andric { LatenciesAlone, BlockRegUsage },
19530b57cec5SDimitry Andric // { LatenciesGrouped, BlockLatencyRegUsage },
19540b57cec5SDimitry Andric { LatenciesGrouped, BlockRegUsageLatency },
19550b57cec5SDimitry Andric { LatenciesGrouped, BlockRegUsage },
19560b57cec5SDimitry Andric // { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
19570b57cec5SDimitry Andric { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
19580b57cec5SDimitry Andric { LatenciesAlonePlusConsecutive, BlockRegUsage }
19590b57cec5SDimitry Andric };
19600b57cec5SDimitry Andric for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
19610b57cec5SDimitry Andric Temp = Scheduler.scheduleVariant(v.first, v.second);
19620b57cec5SDimitry Andric if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
19630b57cec5SDimitry Andric Best = Temp;
19640b57cec5SDimitry Andric }
19650b57cec5SDimitry Andric }
19660b57cec5SDimitry Andric
19670b57cec5SDimitry Andric ScheduledSUnits = Best.SUs;
19680b57cec5SDimitry Andric ScheduledSUnitsInv.resize(SUnits.size());
19690b57cec5SDimitry Andric
19700b57cec5SDimitry Andric for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
19710b57cec5SDimitry Andric ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
19720b57cec5SDimitry Andric }
19730b57cec5SDimitry Andric
19740b57cec5SDimitry Andric moveLowLatencies();
19750b57cec5SDimitry Andric
19760b57cec5SDimitry Andric // Tell the outside world about the result of the scheduling.
19770b57cec5SDimitry Andric
19780b57cec5SDimitry Andric assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
19790b57cec5SDimitry Andric TopRPTracker.setPos(CurrentTop);
19800b57cec5SDimitry Andric
19810eae32dcSDimitry Andric for (unsigned I : ScheduledSUnits) {
19820eae32dcSDimitry Andric SUnit *SU = &SUnits[I];
19830b57cec5SDimitry Andric
19840b57cec5SDimitry Andric scheduleMI(SU, true);
19850b57cec5SDimitry Andric
19860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
19870b57cec5SDimitry Andric << *SU->getInstr());
19880b57cec5SDimitry Andric }
19890b57cec5SDimitry Andric
19900b57cec5SDimitry Andric assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
19910b57cec5SDimitry Andric
19920b57cec5SDimitry Andric placeDebugValues();
19930b57cec5SDimitry Andric
19940b57cec5SDimitry Andric LLVM_DEBUG({
19950b57cec5SDimitry Andric dbgs() << "*** Final schedule for "
19960b57cec5SDimitry Andric << printMBBReference(*begin()->getParent()) << " ***\n";
19970b57cec5SDimitry Andric dumpSchedule();
19980b57cec5SDimitry Andric dbgs() << '\n';
19990b57cec5SDimitry Andric });
20000b57cec5SDimitry Andric }
2001