xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// SI Machine Scheduler interface
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "SIMachineScheduler.h"
150b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16*0fca6ea1SDimitry Andric #include "SIInstrInfo.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric // This scheduler implements a different scheduling algorithm than
250b57cec5SDimitry Andric // GenericScheduler.
260b57cec5SDimitry Andric //
270b57cec5SDimitry Andric // There are several specific architecture behaviours that can't be modelled
280b57cec5SDimitry Andric // for GenericScheduler:
290b57cec5SDimitry Andric // . When accessing the result of an SGPR load instruction, you have to wait
300b57cec5SDimitry Andric // for all the SGPR load instructions before your current instruction to
310b57cec5SDimitry Andric // have finished.
320b57cec5SDimitry Andric // . When accessing the result of an VGPR load instruction, you have to wait
330b57cec5SDimitry Andric // for all the VGPR load instructions previous to the VGPR load instruction
340b57cec5SDimitry Andric // you are interested in to finish.
350b57cec5SDimitry Andric // . The less the register pressure, the best load latencies are hidden
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric // Moreover some specifities (like the fact a lot of instructions in the shader
380b57cec5SDimitry Andric // have few dependencies) makes the generic scheduler have some unpredictable
390b57cec5SDimitry Andric // behaviours. For example when register pressure becomes high, it can either
400b57cec5SDimitry Andric // manage to prevent register pressure from going too high, or it can
410b57cec5SDimitry Andric // increase register pressure even more than if it hadn't taken register
420b57cec5SDimitry Andric // pressure into account.
430b57cec5SDimitry Andric //
440b57cec5SDimitry Andric // Also some other bad behaviours are generated, like loading at the beginning
450b57cec5SDimitry Andric // of the shader a constant in VGPR you won't need until the end of the shader.
460b57cec5SDimitry Andric //
470b57cec5SDimitry Andric // The scheduling problem for SI can distinguish three main parts:
480b57cec5SDimitry Andric // . Hiding high latencies (texture sampling, etc)
490b57cec5SDimitry Andric // . Hiding low latencies (SGPR constant loading, etc)
500b57cec5SDimitry Andric // . Keeping register usage low for better latency hiding and general
510b57cec5SDimitry Andric //   performance
520b57cec5SDimitry Andric //
530b57cec5SDimitry Andric // Some other things can also affect performance, but are hard to predict
540b57cec5SDimitry Andric // (cache usage, the fact the HW can issue several instructions from different
550b57cec5SDimitry Andric // wavefronts if different types, etc)
560b57cec5SDimitry Andric //
570b57cec5SDimitry Andric // This scheduler tries to solve the scheduling problem by dividing it into
580b57cec5SDimitry Andric // simpler sub-problems. It divides the instructions into blocks, schedules
590b57cec5SDimitry Andric // locally inside the blocks where it takes care of low latencies, and then
600b57cec5SDimitry Andric // chooses the order of the blocks by taking care of high latencies.
610b57cec5SDimitry Andric // Dividing the instructions into blocks helps control keeping register
620b57cec5SDimitry Andric // usage low.
630b57cec5SDimitry Andric //
640b57cec5SDimitry Andric // First the instructions are put into blocks.
650b57cec5SDimitry Andric //   We want the blocks help control register usage and hide high latencies
660b57cec5SDimitry Andric //   later. To help control register usage, we typically want all local
6781ad6265SDimitry Andric //   computations, when for example you create a result that can be consumed
680b57cec5SDimitry Andric //   right away, to be contained in a block. Block inputs and outputs would
690b57cec5SDimitry Andric //   typically be important results that are needed in several locations of
700b57cec5SDimitry Andric //   the shader. Since we do want blocks to help hide high latencies, we want
710b57cec5SDimitry Andric //   the instructions inside the block to have a minimal set of dependencies
720b57cec5SDimitry Andric //   on high latencies. It will make it easy to pick blocks to hide specific
730b57cec5SDimitry Andric //   high latencies.
740b57cec5SDimitry Andric //   The block creation algorithm is divided into several steps, and several
750b57cec5SDimitry Andric //   variants can be tried during the scheduling process.
760b57cec5SDimitry Andric //
770b57cec5SDimitry Andric // Second the order of the instructions inside the blocks is chosen.
780b57cec5SDimitry Andric //   At that step we do take into account only register usage and hiding
790b57cec5SDimitry Andric //   low latency instructions
800b57cec5SDimitry Andric //
810b57cec5SDimitry Andric // Third the block order is chosen, there we try to hide high latencies
820b57cec5SDimitry Andric // and keep register usage low.
830b57cec5SDimitry Andric //
840b57cec5SDimitry Andric // After the third step, a pass is done to improve the hiding of low
850b57cec5SDimitry Andric // latencies.
860b57cec5SDimitry Andric //
870b57cec5SDimitry Andric // Actually when talking about 'low latency' or 'high latency' it includes
880b57cec5SDimitry Andric // both the latency to get the cache (or global mem) data go to the register,
890b57cec5SDimitry Andric // and the bandwidth limitations.
900b57cec5SDimitry Andric // Increasing the number of active wavefronts helps hide the former, but it
910b57cec5SDimitry Andric // doesn't solve the latter, thus why even if wavefront count is high, we have
920b57cec5SDimitry Andric // to try have as many instructions hiding high latencies as possible.
9381ad6265SDimitry Andric // The OpenCL doc says for example latency of 400 cycles for a global mem
9481ad6265SDimitry Andric // access, which is hidden by 10 instructions if the wavefront count is 10.
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric // Some figures taken from AMD docs:
970b57cec5SDimitry Andric // Both texture and constant L1 caches are 4-way associative with 64 bytes
980b57cec5SDimitry Andric // lines.
990b57cec5SDimitry Andric // Constant cache is shared with 4 CUs.
1000b57cec5SDimitry Andric // For texture sampling, the address generation unit receives 4 texture
1010b57cec5SDimitry Andric // addresses per cycle, thus we could expect texture sampling latency to be
1020b57cec5SDimitry Andric // equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
1030b57cec5SDimitry Andric // instructions in a wavefront group are executed every 4 cycles),
1040b57cec5SDimitry Andric // or 16 instructions if the other wavefronts associated to the 3 other VALUs
1050b57cec5SDimitry Andric // of the CU do texture sampling too. (Don't take these figures too seriously,
1060b57cec5SDimitry Andric // as I'm not 100% sure of the computation)
1070b57cec5SDimitry Andric // Data exports should get similar latency.
1080b57cec5SDimitry Andric // For constant loading, the cache is shader with 4 CUs.
1090b57cec5SDimitry Andric // The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
1100b57cec5SDimitry Andric // I guess if the other CU don't read the cache, it can go up to 64B/cycle.
1110b57cec5SDimitry Andric // It means a simple s_buffer_load should take one instruction to hide, as
1120b57cec5SDimitry Andric // well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
1130b57cec5SDimitry Andric // cache line.
1140b57cec5SDimitry Andric //
1150b57cec5SDimitry Andric // As of today the driver doesn't preload the constants in cache, thus the
1160b57cec5SDimitry Andric // first loads get extra latency. The doc says global memory access can be
1170b57cec5SDimitry Andric // 300-600 cycles. We do not specially take that into account when scheduling
1180b57cec5SDimitry Andric // As we expect the driver to be able to preload the constants soon.
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric // common code //
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric #ifndef NDEBUG
1230b57cec5SDimitry Andric 
getReasonStr(SIScheduleCandReason Reason)1240b57cec5SDimitry Andric static const char *getReasonStr(SIScheduleCandReason Reason) {
1250b57cec5SDimitry Andric   switch (Reason) {
1260b57cec5SDimitry Andric   case NoCand:         return "NOCAND";
1270b57cec5SDimitry Andric   case RegUsage:       return "REGUSAGE";
1280b57cec5SDimitry Andric   case Latency:        return "LATENCY";
1290b57cec5SDimitry Andric   case Successor:      return "SUCCESSOR";
1300b57cec5SDimitry Andric   case Depth:          return "DEPTH";
1310b57cec5SDimitry Andric   case NodeOrder:      return "ORDER";
1320b57cec5SDimitry Andric   }
1330b57cec5SDimitry Andric   llvm_unreachable("Unknown reason!");
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric #endif
1370b57cec5SDimitry Andric 
138*0fca6ea1SDimitry Andric namespace llvm::SISched {
tryLess(int TryVal,int CandVal,SISchedulerCandidate & TryCand,SISchedulerCandidate & Cand,SIScheduleCandReason Reason)1390b57cec5SDimitry Andric static bool tryLess(int TryVal, int CandVal,
1400b57cec5SDimitry Andric                     SISchedulerCandidate &TryCand,
1410b57cec5SDimitry Andric                     SISchedulerCandidate &Cand,
1420b57cec5SDimitry Andric                     SIScheduleCandReason Reason) {
1430b57cec5SDimitry Andric   if (TryVal < CandVal) {
1440b57cec5SDimitry Andric     TryCand.Reason = Reason;
1450b57cec5SDimitry Andric     return true;
1460b57cec5SDimitry Andric   }
1470b57cec5SDimitry Andric   if (TryVal > CandVal) {
1480b57cec5SDimitry Andric     if (Cand.Reason > Reason)
1490b57cec5SDimitry Andric       Cand.Reason = Reason;
1500b57cec5SDimitry Andric     return true;
1510b57cec5SDimitry Andric   }
1520b57cec5SDimitry Andric   Cand.setRepeat(Reason);
1530b57cec5SDimitry Andric   return false;
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric 
tryGreater(int TryVal,int CandVal,SISchedulerCandidate & TryCand,SISchedulerCandidate & Cand,SIScheduleCandReason Reason)1560b57cec5SDimitry Andric static bool tryGreater(int TryVal, int CandVal,
1570b57cec5SDimitry Andric                        SISchedulerCandidate &TryCand,
1580b57cec5SDimitry Andric                        SISchedulerCandidate &Cand,
1590b57cec5SDimitry Andric                        SIScheduleCandReason Reason) {
1600b57cec5SDimitry Andric   if (TryVal > CandVal) {
1610b57cec5SDimitry Andric     TryCand.Reason = Reason;
1620b57cec5SDimitry Andric     return true;
1630b57cec5SDimitry Andric   }
1640b57cec5SDimitry Andric   if (TryVal < CandVal) {
1650b57cec5SDimitry Andric     if (Cand.Reason > Reason)
1660b57cec5SDimitry Andric       Cand.Reason = Reason;
1670b57cec5SDimitry Andric     return true;
1680b57cec5SDimitry Andric   }
1690b57cec5SDimitry Andric   Cand.setRepeat(Reason);
1700b57cec5SDimitry Andric   return false;
1710b57cec5SDimitry Andric }
172*0fca6ea1SDimitry Andric } // end namespace llvm::SISched
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric // SIScheduleBlock //
1750b57cec5SDimitry Andric 
addUnit(SUnit * SU)1760b57cec5SDimitry Andric void SIScheduleBlock::addUnit(SUnit *SU) {
1770b57cec5SDimitry Andric   NodeNum2Index[SU->NodeNum] = SUnits.size();
1780b57cec5SDimitry Andric   SUnits.push_back(SU);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric #ifndef NDEBUG
traceCandidate(const SISchedCandidate & Cand)1820b57cec5SDimitry Andric void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
1850b57cec5SDimitry Andric   dbgs() << '\n';
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric #endif
1880b57cec5SDimitry Andric 
tryCandidateTopDown(SISchedCandidate & Cand,SISchedCandidate & TryCand)1890b57cec5SDimitry Andric void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
1900b57cec5SDimitry Andric                                           SISchedCandidate &TryCand) {
1910b57cec5SDimitry Andric   // Initialize the candidate if needed.
1920b57cec5SDimitry Andric   if (!Cand.isValid()) {
1930b57cec5SDimitry Andric     TryCand.Reason = NodeOrder;
1940b57cec5SDimitry Andric     return;
1950b57cec5SDimitry Andric   }
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric   if (Cand.SGPRUsage > 60 &&
1980b57cec5SDimitry Andric       SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
1990b57cec5SDimitry Andric                        TryCand, Cand, RegUsage))
2000b57cec5SDimitry Andric     return;
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric   // Schedule low latency instructions as top as possible.
2030b57cec5SDimitry Andric   // Order of priority is:
2040b57cec5SDimitry Andric   // . Low latency instructions which do not depend on other low latency
2050b57cec5SDimitry Andric   //   instructions we haven't waited for
2060b57cec5SDimitry Andric   // . Other instructions which do not depend on low latency instructions
2070b57cec5SDimitry Andric   //   we haven't waited for
2080b57cec5SDimitry Andric   // . Low latencies
2090b57cec5SDimitry Andric   // . All other instructions
2100b57cec5SDimitry Andric   // Goal is to get: low latency instructions - independent instructions
2110b57cec5SDimitry Andric   //     - (eventually some more low latency instructions)
2120b57cec5SDimitry Andric   //     - instructions that depend on the first low latency instructions.
2130b57cec5SDimitry Andric   // If in the block there is a lot of constant loads, the SGPR usage
2140b57cec5SDimitry Andric   // could go quite high, thus above the arbitrary limit of 60 will encourage
2150b57cec5SDimitry Andric   // use the already loaded constants (in order to release some SGPRs) before
2160b57cec5SDimitry Andric   // loading more.
2170b57cec5SDimitry Andric   if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
2180b57cec5SDimitry Andric                        Cand.HasLowLatencyNonWaitedParent,
2190b57cec5SDimitry Andric                        TryCand, Cand, SIScheduleCandReason::Depth))
2200b57cec5SDimitry Andric     return;
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
2230b57cec5SDimitry Andric                           TryCand, Cand, SIScheduleCandReason::Depth))
2240b57cec5SDimitry Andric     return;
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   if (TryCand.IsLowLatency &&
2270b57cec5SDimitry Andric       SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
2280b57cec5SDimitry Andric                        TryCand, Cand, SIScheduleCandReason::Depth))
2290b57cec5SDimitry Andric     return;
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric   if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
2320b57cec5SDimitry Andric                        TryCand, Cand, RegUsage))
2330b57cec5SDimitry Andric     return;
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   // Fall through to original instruction order.
2360b57cec5SDimitry Andric   if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
2370b57cec5SDimitry Andric     TryCand.Reason = NodeOrder;
2380b57cec5SDimitry Andric   }
2390b57cec5SDimitry Andric }
2400b57cec5SDimitry Andric 
pickNode()2410b57cec5SDimitry Andric SUnit* SIScheduleBlock::pickNode() {
2420b57cec5SDimitry Andric   SISchedCandidate TopCand;
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric   for (SUnit* SU : TopReadySUs) {
2450b57cec5SDimitry Andric     SISchedCandidate TryCand;
2460b57cec5SDimitry Andric     std::vector<unsigned> pressure;
2470b57cec5SDimitry Andric     std::vector<unsigned> MaxPressure;
2480b57cec5SDimitry Andric     // Predict register usage after this instruction.
2490b57cec5SDimitry Andric     TryCand.SU = SU;
2500b57cec5SDimitry Andric     TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
2515ffd83dbSDimitry Andric     TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
2525ffd83dbSDimitry Andric     TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
2530b57cec5SDimitry Andric     TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
2540b57cec5SDimitry Andric     TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
2550b57cec5SDimitry Andric     TryCand.HasLowLatencyNonWaitedParent =
2560b57cec5SDimitry Andric       HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
2570b57cec5SDimitry Andric     tryCandidateTopDown(TopCand, TryCand);
2580b57cec5SDimitry Andric     if (TryCand.Reason != NoCand)
2590b57cec5SDimitry Andric       TopCand.setBest(TryCand);
2600b57cec5SDimitry Andric   }
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric   return TopCand.SU;
2630b57cec5SDimitry Andric }
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric 
2660b57cec5SDimitry Andric // Schedule something valid.
fastSchedule()2670b57cec5SDimitry Andric void SIScheduleBlock::fastSchedule() {
2680b57cec5SDimitry Andric   TopReadySUs.clear();
2690b57cec5SDimitry Andric   if (Scheduled)
2700b57cec5SDimitry Andric     undoSchedule();
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric   for (SUnit* SU : SUnits) {
2730b57cec5SDimitry Andric     if (!SU->NumPredsLeft)
2740b57cec5SDimitry Andric       TopReadySUs.push_back(SU);
2750b57cec5SDimitry Andric   }
2760b57cec5SDimitry Andric 
2770b57cec5SDimitry Andric   while (!TopReadySUs.empty()) {
2780b57cec5SDimitry Andric     SUnit *SU = TopReadySUs[0];
2790b57cec5SDimitry Andric     ScheduledSUnits.push_back(SU);
2800b57cec5SDimitry Andric     nodeScheduled(SU);
2810b57cec5SDimitry Andric   }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   Scheduled = true;
2840b57cec5SDimitry Andric }
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric // Returns if the register was set between first and last.
isDefBetween(unsigned Reg,SlotIndex First,SlotIndex Last,const MachineRegisterInfo * MRI,const LiveIntervals * LIS)2870b57cec5SDimitry Andric static bool isDefBetween(unsigned Reg,
2880b57cec5SDimitry Andric                            SlotIndex First, SlotIndex Last,
2890b57cec5SDimitry Andric                            const MachineRegisterInfo *MRI,
2900b57cec5SDimitry Andric                            const LiveIntervals *LIS) {
2910b57cec5SDimitry Andric   for (MachineRegisterInfo::def_instr_iterator
2920b57cec5SDimitry Andric        UI = MRI->def_instr_begin(Reg),
2930b57cec5SDimitry Andric        UE = MRI->def_instr_end(); UI != UE; ++UI) {
2940b57cec5SDimitry Andric     const MachineInstr* MI = &*UI;
2950b57cec5SDimitry Andric     if (MI->isDebugValue())
2960b57cec5SDimitry Andric       continue;
2970b57cec5SDimitry Andric     SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
2980b57cec5SDimitry Andric     if (InstSlot >= First && InstSlot <= Last)
2990b57cec5SDimitry Andric       return true;
3000b57cec5SDimitry Andric   }
3010b57cec5SDimitry Andric   return false;
3020b57cec5SDimitry Andric }
3030b57cec5SDimitry Andric 
initRegPressure(MachineBasicBlock::iterator BeginBlock,MachineBasicBlock::iterator EndBlock)3040b57cec5SDimitry Andric void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
3050b57cec5SDimitry Andric                                       MachineBasicBlock::iterator EndBlock) {
3060b57cec5SDimitry Andric   IntervalPressure Pressure, BotPressure;
3070b57cec5SDimitry Andric   RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
3080b57cec5SDimitry Andric   LiveIntervals *LIS = DAG->getLIS();
3090b57cec5SDimitry Andric   MachineRegisterInfo *MRI = DAG->getMRI();
3100b57cec5SDimitry Andric   DAG->initRPTracker(TopRPTracker);
3110b57cec5SDimitry Andric   DAG->initRPTracker(BotRPTracker);
3120b57cec5SDimitry Andric   DAG->initRPTracker(RPTracker);
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric   // Goes though all SU. RPTracker captures what had to be alive for the SUs
3150b57cec5SDimitry Andric   // to execute, and what is still alive at the end.
3160b57cec5SDimitry Andric   for (SUnit* SU : ScheduledSUnits) {
3170b57cec5SDimitry Andric     RPTracker.setPos(SU->getInstr());
3180b57cec5SDimitry Andric     RPTracker.advance();
3190b57cec5SDimitry Andric   }
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   // Close the RPTracker to finalize live ins/outs.
3220b57cec5SDimitry Andric   RPTracker.closeRegion();
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric   // Initialize the live ins and live outs.
3250b57cec5SDimitry Andric   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
3260b57cec5SDimitry Andric   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric   // Do not Track Physical Registers, because it messes up.
3290b57cec5SDimitry Andric   for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
330bdd1243dSDimitry Andric     if (RegMaskPair.RegUnit.isVirtual())
3310b57cec5SDimitry Andric       LiveInRegs.insert(RegMaskPair.RegUnit);
3320b57cec5SDimitry Andric   }
3330b57cec5SDimitry Andric   LiveOutRegs.clear();
3340b57cec5SDimitry Andric   // There is several possibilities to distinguish:
3350b57cec5SDimitry Andric   // 1) Reg is not input to any instruction in the block, but is output of one
3360b57cec5SDimitry Andric   // 2) 1) + read in the block and not needed after it
3370b57cec5SDimitry Andric   // 3) 1) + read in the block but needed in another block
3380b57cec5SDimitry Andric   // 4) Reg is input of an instruction but another block will read it too
3390b57cec5SDimitry Andric   // 5) Reg is input of an instruction and then rewritten in the block.
3400b57cec5SDimitry Andric   //    result is not read in the block (implies used in another block)
3410b57cec5SDimitry Andric   // 6) Reg is input of an instruction and then rewritten in the block.
3420b57cec5SDimitry Andric   //    result is read in the block and not needed in another block
3430b57cec5SDimitry Andric   // 7) Reg is input of an instruction and then rewritten in the block.
3440b57cec5SDimitry Andric   //    result is read in the block but also needed in another block
3450b57cec5SDimitry Andric   // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
3460b57cec5SDimitry Andric   // We want LiveOutRegs to contain only Regs whose content will be read after
3470b57cec5SDimitry Andric   // in another block, and whose content was written in the current block,
3480b57cec5SDimitry Andric   // that is we want it to get 1, 3, 5, 7
3490b57cec5SDimitry Andric   // Since we made the MIs of a block to be packed all together before
3500b57cec5SDimitry Andric   // scheduling, then the LiveIntervals were correct, and the RPTracker was
3510b57cec5SDimitry Andric   // able to correctly handle 5 vs 6, 2 vs 3.
3520b57cec5SDimitry Andric   // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
3530b57cec5SDimitry Andric   // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
35481ad6265SDimitry Andric   // Comparing to LiveInRegs is not sufficient to differentiate 4 vs 5, 7
3550b57cec5SDimitry Andric   // The use of findDefBetween removes the case 4.
3560b57cec5SDimitry Andric   for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
357e8d8bef9SDimitry Andric     Register Reg = RegMaskPair.RegUnit;
358e8d8bef9SDimitry Andric     if (Reg.isVirtual() &&
3590b57cec5SDimitry Andric         isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
3600b57cec5SDimitry Andric                      LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
3610b57cec5SDimitry Andric                      LIS)) {
3620b57cec5SDimitry Andric       LiveOutRegs.insert(Reg);
3630b57cec5SDimitry Andric     }
3640b57cec5SDimitry Andric   }
3650b57cec5SDimitry Andric 
3660b57cec5SDimitry Andric   // Pressure = sum_alive_registers register size
3670b57cec5SDimitry Andric   // Internally llvm will represent some registers as big 128 bits registers
3680b57cec5SDimitry Andric   // for example, but they actually correspond to 4 actual 32 bits registers.
3690b57cec5SDimitry Andric   // Thus Pressure is not equal to num_alive_registers * constant.
3700b57cec5SDimitry Andric   LiveInPressure = TopPressure.MaxSetPressure;
3710b57cec5SDimitry Andric   LiveOutPressure = BotPressure.MaxSetPressure;
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   // Prepares TopRPTracker for top down scheduling.
3740b57cec5SDimitry Andric   TopRPTracker.closeTop();
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric 
schedule(MachineBasicBlock::iterator BeginBlock,MachineBasicBlock::iterator EndBlock)3770b57cec5SDimitry Andric void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
3780b57cec5SDimitry Andric                                MachineBasicBlock::iterator EndBlock) {
3790b57cec5SDimitry Andric   if (!Scheduled)
3800b57cec5SDimitry Andric     fastSchedule();
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   // PreScheduling phase to set LiveIn and LiveOut.
3830b57cec5SDimitry Andric   initRegPressure(BeginBlock, EndBlock);
3840b57cec5SDimitry Andric   undoSchedule();
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   // Schedule for real now.
3870b57cec5SDimitry Andric 
3880b57cec5SDimitry Andric   TopReadySUs.clear();
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   for (SUnit* SU : SUnits) {
3910b57cec5SDimitry Andric     if (!SU->NumPredsLeft)
3920b57cec5SDimitry Andric       TopReadySUs.push_back(SU);
3930b57cec5SDimitry Andric   }
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric   while (!TopReadySUs.empty()) {
3960b57cec5SDimitry Andric     SUnit *SU = pickNode();
3970b57cec5SDimitry Andric     ScheduledSUnits.push_back(SU);
3980b57cec5SDimitry Andric     TopRPTracker.setPos(SU->getInstr());
3990b57cec5SDimitry Andric     TopRPTracker.advance();
4000b57cec5SDimitry Andric     nodeScheduled(SU);
4010b57cec5SDimitry Andric   }
4020b57cec5SDimitry Andric 
40381ad6265SDimitry Andric   // TODO: compute InternalAdditionalPressure.
404349cc55cSDimitry Andric   InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric   // Check everything is right.
4070b57cec5SDimitry Andric #ifndef NDEBUG
4080b57cec5SDimitry Andric   assert(SUnits.size() == ScheduledSUnits.size() &&
4090b57cec5SDimitry Andric             TopReadySUs.empty());
4100b57cec5SDimitry Andric   for (SUnit* SU : SUnits) {
4110b57cec5SDimitry Andric     assert(SU->isScheduled &&
4120b57cec5SDimitry Andric               SU->NumPredsLeft == 0);
4130b57cec5SDimitry Andric   }
4140b57cec5SDimitry Andric #endif
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   Scheduled = true;
4170b57cec5SDimitry Andric }
4180b57cec5SDimitry Andric 
undoSchedule()4190b57cec5SDimitry Andric void SIScheduleBlock::undoSchedule() {
4200b57cec5SDimitry Andric   for (SUnit* SU : SUnits) {
4210b57cec5SDimitry Andric     SU->isScheduled = false;
4220b57cec5SDimitry Andric     for (SDep& Succ : SU->Succs) {
4230b57cec5SDimitry Andric       if (BC->isSUInBlock(Succ.getSUnit(), ID))
4240b57cec5SDimitry Andric         undoReleaseSucc(SU, &Succ);
4250b57cec5SDimitry Andric     }
4260b57cec5SDimitry Andric   }
4270b57cec5SDimitry Andric   HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
4280b57cec5SDimitry Andric   ScheduledSUnits.clear();
4290b57cec5SDimitry Andric   Scheduled = false;
4300b57cec5SDimitry Andric }
4310b57cec5SDimitry Andric 
undoReleaseSucc(SUnit * SU,SDep * SuccEdge)4320b57cec5SDimitry Andric void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
4330b57cec5SDimitry Andric   SUnit *SuccSU = SuccEdge->getSUnit();
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   if (SuccEdge->isWeak()) {
4360b57cec5SDimitry Andric     ++SuccSU->WeakPredsLeft;
4370b57cec5SDimitry Andric     return;
4380b57cec5SDimitry Andric   }
4390b57cec5SDimitry Andric   ++SuccSU->NumPredsLeft;
4400b57cec5SDimitry Andric }
4410b57cec5SDimitry Andric 
releaseSucc(SUnit * SU,SDep * SuccEdge)4420b57cec5SDimitry Andric void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
4430b57cec5SDimitry Andric   SUnit *SuccSU = SuccEdge->getSUnit();
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric   if (SuccEdge->isWeak()) {
4460b57cec5SDimitry Andric     --SuccSU->WeakPredsLeft;
4470b57cec5SDimitry Andric     return;
4480b57cec5SDimitry Andric   }
4490b57cec5SDimitry Andric #ifndef NDEBUG
4500b57cec5SDimitry Andric   if (SuccSU->NumPredsLeft == 0) {
4510b57cec5SDimitry Andric     dbgs() << "*** Scheduling failed! ***\n";
4520b57cec5SDimitry Andric     DAG->dumpNode(*SuccSU);
4530b57cec5SDimitry Andric     dbgs() << " has been released too many times!\n";
4540b57cec5SDimitry Andric     llvm_unreachable(nullptr);
4550b57cec5SDimitry Andric   }
4560b57cec5SDimitry Andric #endif
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   --SuccSU->NumPredsLeft;
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric /// Release Successors of the SU that are in the block or not.
releaseSuccessors(SUnit * SU,bool InOrOutBlock)4620b57cec5SDimitry Andric void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
4630b57cec5SDimitry Andric   for (SDep& Succ : SU->Succs) {
4640b57cec5SDimitry Andric     SUnit *SuccSU = Succ.getSUnit();
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric     if (SuccSU->NodeNum >= DAG->SUnits.size())
4670b57cec5SDimitry Andric         continue;
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric     if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
4700b57cec5SDimitry Andric       continue;
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric     releaseSucc(SU, &Succ);
4730b57cec5SDimitry Andric     if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
4740b57cec5SDimitry Andric       TopReadySUs.push_back(SuccSU);
4750b57cec5SDimitry Andric   }
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric 
nodeScheduled(SUnit * SU)4780b57cec5SDimitry Andric void SIScheduleBlock::nodeScheduled(SUnit *SU) {
4790b57cec5SDimitry Andric   // Is in TopReadySUs
4800b57cec5SDimitry Andric   assert (!SU->NumPredsLeft);
4810b57cec5SDimitry Andric   std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
4820b57cec5SDimitry Andric   if (I == TopReadySUs.end()) {
4830b57cec5SDimitry Andric     dbgs() << "Data Structure Bug in SI Scheduler\n";
4840b57cec5SDimitry Andric     llvm_unreachable(nullptr);
4850b57cec5SDimitry Andric   }
4860b57cec5SDimitry Andric   TopReadySUs.erase(I);
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric   releaseSuccessors(SU, true);
4890b57cec5SDimitry Andric   // Scheduling this node will trigger a wait,
4900b57cec5SDimitry Andric   // thus propagate to other instructions that they do not need to wait either.
4910b57cec5SDimitry Andric   if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
4920b57cec5SDimitry Andric     HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
4930b57cec5SDimitry Andric 
4940b57cec5SDimitry Andric   if (DAG->IsLowLatencySU[SU->NodeNum]) {
4950b57cec5SDimitry Andric      for (SDep& Succ : SU->Succs) {
4960b57cec5SDimitry Andric       std::map<unsigned, unsigned>::iterator I =
4970b57cec5SDimitry Andric         NodeNum2Index.find(Succ.getSUnit()->NodeNum);
4980b57cec5SDimitry Andric       if (I != NodeNum2Index.end())
4990b57cec5SDimitry Andric         HasLowLatencyNonWaitedParent[I->second] = 1;
5000b57cec5SDimitry Andric     }
5010b57cec5SDimitry Andric   }
5020b57cec5SDimitry Andric   SU->isScheduled = true;
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric 
finalizeUnits()5050b57cec5SDimitry Andric void SIScheduleBlock::finalizeUnits() {
5060b57cec5SDimitry Andric   // We remove links from outside blocks to enable scheduling inside the block.
5070b57cec5SDimitry Andric   for (SUnit* SU : SUnits) {
5080b57cec5SDimitry Andric     releaseSuccessors(SU, false);
5090b57cec5SDimitry Andric     if (DAG->IsHighLatencySU[SU->NodeNum])
5100b57cec5SDimitry Andric       HighLatencyBlock = true;
5110b57cec5SDimitry Andric   }
5120b57cec5SDimitry Andric   HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
5130b57cec5SDimitry Andric }
5140b57cec5SDimitry Andric 
5150b57cec5SDimitry Andric // we maintain ascending order of IDs
addPred(SIScheduleBlock * Pred)5160b57cec5SDimitry Andric void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
5170b57cec5SDimitry Andric   unsigned PredID = Pred->getID();
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric   // Check if not already predecessor.
5200b57cec5SDimitry Andric   for (SIScheduleBlock* P : Preds) {
5210b57cec5SDimitry Andric     if (PredID == P->getID())
5220b57cec5SDimitry Andric       return;
5230b57cec5SDimitry Andric   }
5240b57cec5SDimitry Andric   Preds.push_back(Pred);
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric   assert(none_of(Succs,
5270b57cec5SDimitry Andric                  [=](std::pair<SIScheduleBlock*,
5280b57cec5SDimitry Andric                      SIScheduleBlockLinkKind> S) {
5290b57cec5SDimitry Andric                    return PredID == S.first->getID();
5300b57cec5SDimitry Andric                     }) &&
5310b57cec5SDimitry Andric          "Loop in the Block Graph!");
5320b57cec5SDimitry Andric }
5330b57cec5SDimitry Andric 
addSucc(SIScheduleBlock * Succ,SIScheduleBlockLinkKind Kind)5340b57cec5SDimitry Andric void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
5350b57cec5SDimitry Andric                               SIScheduleBlockLinkKind Kind) {
5360b57cec5SDimitry Andric   unsigned SuccID = Succ->getID();
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric   // Check if not already predecessor.
5390b57cec5SDimitry Andric   for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
5400b57cec5SDimitry Andric     if (SuccID == S.first->getID()) {
5410b57cec5SDimitry Andric       if (S.second == SIScheduleBlockLinkKind::NoData &&
5420b57cec5SDimitry Andric           Kind == SIScheduleBlockLinkKind::Data)
5430b57cec5SDimitry Andric         S.second = Kind;
5440b57cec5SDimitry Andric       return;
5450b57cec5SDimitry Andric     }
5460b57cec5SDimitry Andric   }
5470b57cec5SDimitry Andric   if (Succ->isHighLatencyBlock())
5480b57cec5SDimitry Andric     ++NumHighLatencySuccessors;
549*0fca6ea1SDimitry Andric   Succs.emplace_back(Succ, Kind);
5500b57cec5SDimitry Andric 
5510b57cec5SDimitry Andric   assert(none_of(Preds,
5520b57cec5SDimitry Andric                  [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&
5530b57cec5SDimitry Andric          "Loop in the Block Graph!");
5540b57cec5SDimitry Andric }
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric #ifndef NDEBUG
printDebug(bool full)5570b57cec5SDimitry Andric void SIScheduleBlock::printDebug(bool full) {
5580b57cec5SDimitry Andric   dbgs() << "Block (" << ID << ")\n";
5590b57cec5SDimitry Andric   if (!full)
5600b57cec5SDimitry Andric     return;
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric   dbgs() << "\nContains High Latency Instruction: "
5630b57cec5SDimitry Andric          << HighLatencyBlock << '\n';
5640b57cec5SDimitry Andric   dbgs() << "\nDepends On:\n";
5650b57cec5SDimitry Andric   for (SIScheduleBlock* P : Preds) {
5660b57cec5SDimitry Andric     P->printDebug(false);
5670b57cec5SDimitry Andric   }
5680b57cec5SDimitry Andric 
5690b57cec5SDimitry Andric   dbgs() << "\nSuccessors:\n";
5700b57cec5SDimitry Andric   for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
5710b57cec5SDimitry Andric     if (S.second == SIScheduleBlockLinkKind::Data)
5720b57cec5SDimitry Andric       dbgs() << "(Data Dep) ";
5730b57cec5SDimitry Andric     S.first->printDebug(false);
5740b57cec5SDimitry Andric   }
5750b57cec5SDimitry Andric 
5760b57cec5SDimitry Andric   if (Scheduled) {
5775ffd83dbSDimitry Andric     dbgs() << "LiveInPressure "
5785ffd83dbSDimitry Andric            << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
5795ffd83dbSDimitry Andric            << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
5805ffd83dbSDimitry Andric     dbgs() << "LiveOutPressure "
5815ffd83dbSDimitry Andric            << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
5825ffd83dbSDimitry Andric            << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
5830b57cec5SDimitry Andric     dbgs() << "LiveIns:\n";
5840b57cec5SDimitry Andric     for (unsigned Reg : LiveInRegs)
5850b57cec5SDimitry Andric       dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric     dbgs() << "\nLiveOuts:\n";
5880b57cec5SDimitry Andric     for (unsigned Reg : LiveOutRegs)
5890b57cec5SDimitry Andric       dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
5900b57cec5SDimitry Andric   }
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric   dbgs() << "\nInstructions:\n";
5930b57cec5SDimitry Andric   for (const SUnit* SU : SUnits)
5940b57cec5SDimitry Andric       DAG->dumpNode(*SU);
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric   dbgs() << "///////////////////////\n";
5970b57cec5SDimitry Andric }
5980b57cec5SDimitry Andric #endif
5990b57cec5SDimitry Andric 
6000b57cec5SDimitry Andric // SIScheduleBlockCreator //
6010b57cec5SDimitry Andric 
SIScheduleBlockCreator(SIScheduleDAGMI * DAG)602480093f4SDimitry Andric SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
603480093f4SDimitry Andric     : DAG(DAG) {}
6040b57cec5SDimitry Andric 
6050b57cec5SDimitry Andric SIScheduleBlocks
getBlocks(SISchedulerBlockCreatorVariant BlockVariant)6060b57cec5SDimitry Andric SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
6070b57cec5SDimitry Andric   std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
6080b57cec5SDimitry Andric     Blocks.find(BlockVariant);
6090b57cec5SDimitry Andric   if (B == Blocks.end()) {
6100b57cec5SDimitry Andric     SIScheduleBlocks Res;
6110b57cec5SDimitry Andric     createBlocksForVariant(BlockVariant);
6120b57cec5SDimitry Andric     topologicalSort();
6130b57cec5SDimitry Andric     scheduleInsideBlocks();
6140b57cec5SDimitry Andric     fillStats();
6150b57cec5SDimitry Andric     Res.Blocks = CurrentBlocks;
6160b57cec5SDimitry Andric     Res.TopDownIndex2Block = TopDownIndex2Block;
6170b57cec5SDimitry Andric     Res.TopDownBlock2Index = TopDownBlock2Index;
6180b57cec5SDimitry Andric     Blocks[BlockVariant] = Res;
6190b57cec5SDimitry Andric     return Res;
6200b57cec5SDimitry Andric   }
621*0fca6ea1SDimitry Andric   return B->second;
6220b57cec5SDimitry Andric }
6230b57cec5SDimitry Andric 
isSUInBlock(SUnit * SU,unsigned ID)6240b57cec5SDimitry Andric bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
6250b57cec5SDimitry Andric   if (SU->NodeNum >= DAG->SUnits.size())
6260b57cec5SDimitry Andric     return false;
6270b57cec5SDimitry Andric   return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
6280b57cec5SDimitry Andric }
6290b57cec5SDimitry Andric 
colorHighLatenciesAlone()6300b57cec5SDimitry Andric void SIScheduleBlockCreator::colorHighLatenciesAlone() {
6310b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
6340b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[i];
6350b57cec5SDimitry Andric     if (DAG->IsHighLatencySU[SU->NodeNum]) {
6360b57cec5SDimitry Andric       CurrentColoring[SU->NodeNum] = NextReservedID++;
6370b57cec5SDimitry Andric     }
6380b57cec5SDimitry Andric   }
6390b57cec5SDimitry Andric }
6400b57cec5SDimitry Andric 
6410b57cec5SDimitry Andric static bool
hasDataDependencyPred(const SUnit & SU,const SUnit & FromSU)6420b57cec5SDimitry Andric hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
6430b57cec5SDimitry Andric   for (const auto &PredDep : SU.Preds) {
6440b57cec5SDimitry Andric     if (PredDep.getSUnit() == &FromSU &&
6450b57cec5SDimitry Andric         PredDep.getKind() == llvm::SDep::Data)
6460b57cec5SDimitry Andric       return true;
6470b57cec5SDimitry Andric   }
6480b57cec5SDimitry Andric   return false;
6490b57cec5SDimitry Andric }
6500b57cec5SDimitry Andric 
colorHighLatenciesGroups()6510b57cec5SDimitry Andric void SIScheduleBlockCreator::colorHighLatenciesGroups() {
6520b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
6530b57cec5SDimitry Andric   unsigned NumHighLatencies = 0;
6540b57cec5SDimitry Andric   unsigned GroupSize;
6550b57cec5SDimitry Andric   int Color = NextReservedID;
6560b57cec5SDimitry Andric   unsigned Count = 0;
6570b57cec5SDimitry Andric   std::set<unsigned> FormingGroup;
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
6600b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[i];
6610b57cec5SDimitry Andric     if (DAG->IsHighLatencySU[SU->NodeNum])
6620b57cec5SDimitry Andric       ++NumHighLatencies;
6630b57cec5SDimitry Andric   }
6640b57cec5SDimitry Andric 
6650b57cec5SDimitry Andric   if (NumHighLatencies == 0)
6660b57cec5SDimitry Andric     return;
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric   if (NumHighLatencies <= 6)
6690b57cec5SDimitry Andric     GroupSize = 2;
6700b57cec5SDimitry Andric   else if (NumHighLatencies <= 12)
6710b57cec5SDimitry Andric     GroupSize = 3;
6720b57cec5SDimitry Andric   else
6730b57cec5SDimitry Andric     GroupSize = 4;
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric   for (unsigned SUNum : DAG->TopDownIndex2SU) {
6760b57cec5SDimitry Andric     const SUnit &SU = DAG->SUnits[SUNum];
6770b57cec5SDimitry Andric     if (DAG->IsHighLatencySU[SU.NodeNum]) {
6780b57cec5SDimitry Andric       unsigned CompatibleGroup = true;
6790b57cec5SDimitry Andric       int ProposedColor = Color;
6800b57cec5SDimitry Andric       std::vector<int> AdditionalElements;
6810b57cec5SDimitry Andric 
6820b57cec5SDimitry Andric       // We don't want to put in the same block
6830b57cec5SDimitry Andric       // two high latency instructions that depend
6840b57cec5SDimitry Andric       // on each other.
6850b57cec5SDimitry Andric       // One way would be to check canAddEdge
6860b57cec5SDimitry Andric       // in both directions, but that currently is not
6870b57cec5SDimitry Andric       // enough because there the high latency order is
6880b57cec5SDimitry Andric       // enforced (via links).
6890b57cec5SDimitry Andric       // Instead, look at the dependencies between the
6900b57cec5SDimitry Andric       // high latency instructions and deduce if it is
6910b57cec5SDimitry Andric       // a data dependency or not.
6920b57cec5SDimitry Andric       for (unsigned j : FormingGroup) {
6930b57cec5SDimitry Andric         bool HasSubGraph;
6940b57cec5SDimitry Andric         std::vector<int> SubGraph;
6950b57cec5SDimitry Andric         // By construction (topological order), if SU and
69681ad6265SDimitry Andric         // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
6970b57cec5SDimitry Andric         // in the parent graph of SU.
6980b57cec5SDimitry Andric #ifndef NDEBUG
6990b57cec5SDimitry Andric         SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
7000b57cec5SDimitry Andric                                                HasSubGraph);
7010b57cec5SDimitry Andric         assert(!HasSubGraph);
7020b57cec5SDimitry Andric #endif
7030b57cec5SDimitry Andric         SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
7040b57cec5SDimitry Andric                                                HasSubGraph);
7050b57cec5SDimitry Andric         if (!HasSubGraph)
7060b57cec5SDimitry Andric           continue; // No dependencies between each other
707*0fca6ea1SDimitry Andric         if (SubGraph.size() > 5) {
7080b57cec5SDimitry Andric           // Too many elements would be required to be added to the block.
7090b57cec5SDimitry Andric           CompatibleGroup = false;
7100b57cec5SDimitry Andric           break;
7110b57cec5SDimitry Andric         }
7120b57cec5SDimitry Andric         // Check the type of dependency
7130b57cec5SDimitry Andric         for (unsigned k : SubGraph) {
7140b57cec5SDimitry Andric           // If in the path to join the two instructions,
7150b57cec5SDimitry Andric           // there is another high latency instruction,
7160b57cec5SDimitry Andric           // or instructions colored for another block
7170b57cec5SDimitry Andric           // abort the merge.
718*0fca6ea1SDimitry Andric           if (DAG->IsHighLatencySU[k] || (CurrentColoring[k] != ProposedColor &&
7190b57cec5SDimitry Andric                                           CurrentColoring[k] != 0)) {
7200b57cec5SDimitry Andric             CompatibleGroup = false;
7210b57cec5SDimitry Andric             break;
7220b57cec5SDimitry Andric           }
7230b57cec5SDimitry Andric           // If one of the SU in the subgraph depends on the result of SU j,
7240b57cec5SDimitry Andric           // there'll be a data dependency.
7250b57cec5SDimitry Andric           if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
7260b57cec5SDimitry Andric             CompatibleGroup = false;
7270b57cec5SDimitry Andric             break;
7280b57cec5SDimitry Andric           }
7290b57cec5SDimitry Andric         }
7300b57cec5SDimitry Andric         if (!CompatibleGroup)
7310b57cec5SDimitry Andric           break;
7320b57cec5SDimitry Andric         // Same check for the SU
7330b57cec5SDimitry Andric         if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
7340b57cec5SDimitry Andric           CompatibleGroup = false;
7350b57cec5SDimitry Andric           break;
7360b57cec5SDimitry Andric         }
7370b57cec5SDimitry Andric         // Add all the required instructions to the block
7380b57cec5SDimitry Andric         // These cannot live in another block (because they
7390b57cec5SDimitry Andric         // depend (order dependency) on one of the
7400b57cec5SDimitry Andric         // instruction in the block, and are required for the
7410b57cec5SDimitry Andric         // high latency instruction we add.
742e8d8bef9SDimitry Andric         llvm::append_range(AdditionalElements, SubGraph);
7430b57cec5SDimitry Andric       }
7440b57cec5SDimitry Andric       if (CompatibleGroup) {
7450b57cec5SDimitry Andric         FormingGroup.insert(SU.NodeNum);
7460b57cec5SDimitry Andric         for (unsigned j : AdditionalElements)
7470b57cec5SDimitry Andric           CurrentColoring[j] = ProposedColor;
7480b57cec5SDimitry Andric         CurrentColoring[SU.NodeNum] = ProposedColor;
7490b57cec5SDimitry Andric         ++Count;
7500b57cec5SDimitry Andric       }
7510b57cec5SDimitry Andric       // Found one incompatible instruction,
7520b57cec5SDimitry Andric       // or has filled a big enough group.
7530b57cec5SDimitry Andric       // -> start a new one.
7540b57cec5SDimitry Andric       if (!CompatibleGroup) {
7550b57cec5SDimitry Andric         FormingGroup.clear();
7560b57cec5SDimitry Andric         Color = ++NextReservedID;
7570b57cec5SDimitry Andric         ProposedColor = Color;
7580b57cec5SDimitry Andric         FormingGroup.insert(SU.NodeNum);
7590b57cec5SDimitry Andric         CurrentColoring[SU.NodeNum] = ProposedColor;
7600b57cec5SDimitry Andric         Count = 0;
7610b57cec5SDimitry Andric       } else if (Count == GroupSize) {
7620b57cec5SDimitry Andric         FormingGroup.clear();
7630b57cec5SDimitry Andric         Color = ++NextReservedID;
7640b57cec5SDimitry Andric         ProposedColor = Color;
7650b57cec5SDimitry Andric         Count = 0;
7660b57cec5SDimitry Andric       }
7670b57cec5SDimitry Andric     }
7680b57cec5SDimitry Andric   }
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric 
colorComputeReservedDependencies()7710b57cec5SDimitry Andric void SIScheduleBlockCreator::colorComputeReservedDependencies() {
7720b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
7730b57cec5SDimitry Andric   std::map<std::set<unsigned>, unsigned> ColorCombinations;
7740b57cec5SDimitry Andric 
7750b57cec5SDimitry Andric   CurrentTopDownReservedDependencyColoring.clear();
7760b57cec5SDimitry Andric   CurrentBottomUpReservedDependencyColoring.clear();
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric   CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
7790b57cec5SDimitry Andric   CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
7800b57cec5SDimitry Andric 
7810b57cec5SDimitry Andric   // Traverse TopDown, and give different colors to SUs depending
7820b57cec5SDimitry Andric   // on which combination of High Latencies they depend on.
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric   for (unsigned SUNum : DAG->TopDownIndex2SU) {
7850b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
7860b57cec5SDimitry Andric     std::set<unsigned> SUColors;
7870b57cec5SDimitry Andric 
7880b57cec5SDimitry Andric     // Already given.
7890b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum]) {
7900b57cec5SDimitry Andric       CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
7910b57cec5SDimitry Andric         CurrentColoring[SU->NodeNum];
7920b57cec5SDimitry Andric       continue;
7930b57cec5SDimitry Andric     }
7940b57cec5SDimitry Andric 
7950b57cec5SDimitry Andric    for (SDep& PredDep : SU->Preds) {
7960b57cec5SDimitry Andric       SUnit *Pred = PredDep.getSUnit();
7970b57cec5SDimitry Andric       if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
7980b57cec5SDimitry Andric         continue;
7990b57cec5SDimitry Andric       if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
8000b57cec5SDimitry Andric         SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
8010b57cec5SDimitry Andric     }
8020b57cec5SDimitry Andric     // Color 0 by default.
8030b57cec5SDimitry Andric     if (SUColors.empty())
8040b57cec5SDimitry Andric       continue;
8050b57cec5SDimitry Andric     // Same color than parents.
8060b57cec5SDimitry Andric     if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
8070b57cec5SDimitry Andric       CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
8080b57cec5SDimitry Andric         *SUColors.begin();
8090b57cec5SDimitry Andric     else {
8100b57cec5SDimitry Andric       std::map<std::set<unsigned>, unsigned>::iterator Pos =
8110b57cec5SDimitry Andric         ColorCombinations.find(SUColors);
8120b57cec5SDimitry Andric       if (Pos != ColorCombinations.end()) {
8130b57cec5SDimitry Andric           CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
8140b57cec5SDimitry Andric       } else {
8150b57cec5SDimitry Andric         CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
8160b57cec5SDimitry Andric           NextNonReservedID;
8170b57cec5SDimitry Andric         ColorCombinations[SUColors] = NextNonReservedID++;
8180b57cec5SDimitry Andric       }
8190b57cec5SDimitry Andric     }
8200b57cec5SDimitry Andric   }
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric   ColorCombinations.clear();
8230b57cec5SDimitry Andric 
8240b57cec5SDimitry Andric   // Same as before, but BottomUp.
8250b57cec5SDimitry Andric 
8260b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
8270b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
8280b57cec5SDimitry Andric     std::set<unsigned> SUColors;
8290b57cec5SDimitry Andric 
8300b57cec5SDimitry Andric     // Already given.
8310b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum]) {
8320b57cec5SDimitry Andric       CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
8330b57cec5SDimitry Andric         CurrentColoring[SU->NodeNum];
8340b57cec5SDimitry Andric       continue;
8350b57cec5SDimitry Andric     }
8360b57cec5SDimitry Andric 
8370b57cec5SDimitry Andric     for (SDep& SuccDep : SU->Succs) {
8380b57cec5SDimitry Andric       SUnit *Succ = SuccDep.getSUnit();
8390b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
8400b57cec5SDimitry Andric         continue;
8410b57cec5SDimitry Andric       if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
8420b57cec5SDimitry Andric         SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
8430b57cec5SDimitry Andric     }
8440b57cec5SDimitry Andric     // Keep color 0.
8450b57cec5SDimitry Andric     if (SUColors.empty())
8460b57cec5SDimitry Andric       continue;
8470b57cec5SDimitry Andric     // Same color than parents.
8480b57cec5SDimitry Andric     if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
8490b57cec5SDimitry Andric       CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
8500b57cec5SDimitry Andric         *SUColors.begin();
8510b57cec5SDimitry Andric     else {
8520b57cec5SDimitry Andric       std::map<std::set<unsigned>, unsigned>::iterator Pos =
8530b57cec5SDimitry Andric         ColorCombinations.find(SUColors);
8540b57cec5SDimitry Andric       if (Pos != ColorCombinations.end()) {
8550b57cec5SDimitry Andric         CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
8560b57cec5SDimitry Andric       } else {
8570b57cec5SDimitry Andric         CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
8580b57cec5SDimitry Andric           NextNonReservedID;
8590b57cec5SDimitry Andric         ColorCombinations[SUColors] = NextNonReservedID++;
8600b57cec5SDimitry Andric       }
8610b57cec5SDimitry Andric     }
8620b57cec5SDimitry Andric   }
8630b57cec5SDimitry Andric }
8640b57cec5SDimitry Andric 
colorAccordingToReservedDependencies()8650b57cec5SDimitry Andric void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
8660b57cec5SDimitry Andric   std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
8670b57cec5SDimitry Andric 
8680b57cec5SDimitry Andric   // Every combination of colors given by the top down
8690b57cec5SDimitry Andric   // and bottom up Reserved node dependency
8700b57cec5SDimitry Andric 
8710eae32dcSDimitry Andric   for (const SUnit &SU : DAG->SUnits) {
8720b57cec5SDimitry Andric     std::pair<unsigned, unsigned> SUColors;
8730b57cec5SDimitry Andric 
8740b57cec5SDimitry Andric     // High latency instructions: already given.
8750eae32dcSDimitry Andric     if (CurrentColoring[SU.NodeNum])
8760b57cec5SDimitry Andric       continue;
8770b57cec5SDimitry Andric 
8780eae32dcSDimitry Andric     SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];
8790eae32dcSDimitry Andric     SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];
8800b57cec5SDimitry Andric 
8810b57cec5SDimitry Andric     std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
8820b57cec5SDimitry Andric       ColorCombinations.find(SUColors);
8830b57cec5SDimitry Andric     if (Pos != ColorCombinations.end()) {
8840eae32dcSDimitry Andric       CurrentColoring[SU.NodeNum] = Pos->second;
8850b57cec5SDimitry Andric     } else {
8860eae32dcSDimitry Andric       CurrentColoring[SU.NodeNum] = NextNonReservedID;
8870b57cec5SDimitry Andric       ColorCombinations[SUColors] = NextNonReservedID++;
8880b57cec5SDimitry Andric     }
8890b57cec5SDimitry Andric   }
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric 
colorEndsAccordingToDependencies()8920b57cec5SDimitry Andric void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
8930b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
8940b57cec5SDimitry Andric   std::vector<int> PendingColoring = CurrentColoring;
8950b57cec5SDimitry Andric 
8960b57cec5SDimitry Andric   assert(DAGSize >= 1 &&
8970b57cec5SDimitry Andric          CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
8980b57cec5SDimitry Andric          CurrentTopDownReservedDependencyColoring.size() == DAGSize);
8990b57cec5SDimitry Andric   // If there is no reserved block at all, do nothing. We don't want
9000b57cec5SDimitry Andric   // everything in one block.
901*0fca6ea1SDimitry Andric   if (*llvm::max_element(CurrentBottomUpReservedDependencyColoring) == 0 &&
902*0fca6ea1SDimitry Andric       *llvm::max_element(CurrentTopDownReservedDependencyColoring) == 0)
9030b57cec5SDimitry Andric     return;
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
9060b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
9070b57cec5SDimitry Andric     std::set<unsigned> SUColors;
9080b57cec5SDimitry Andric     std::set<unsigned> SUColorsPending;
9090b57cec5SDimitry Andric 
9100b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
9110b57cec5SDimitry Andric       continue;
9120b57cec5SDimitry Andric 
9130b57cec5SDimitry Andric     if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
9140b57cec5SDimitry Andric         CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
9150b57cec5SDimitry Andric       continue;
9160b57cec5SDimitry Andric 
9170b57cec5SDimitry Andric     for (SDep& SuccDep : SU->Succs) {
9180b57cec5SDimitry Andric       SUnit *Succ = SuccDep.getSUnit();
9190b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
9200b57cec5SDimitry Andric         continue;
9210b57cec5SDimitry Andric       if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
9220b57cec5SDimitry Andric           CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
9230b57cec5SDimitry Andric         SUColors.insert(CurrentColoring[Succ->NodeNum]);
9240b57cec5SDimitry Andric       SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
9250b57cec5SDimitry Andric     }
9260b57cec5SDimitry Andric     // If there is only one child/parent block, and that block
9270b57cec5SDimitry Andric     // is not among the ones we are removing in this path, then
9280b57cec5SDimitry Andric     // merge the instruction to that block
9290b57cec5SDimitry Andric     if (SUColors.size() == 1 && SUColorsPending.size() == 1)
9300b57cec5SDimitry Andric       PendingColoring[SU->NodeNum] = *SUColors.begin();
9310b57cec5SDimitry Andric     else // TODO: Attribute new colors depending on color
9320b57cec5SDimitry Andric          // combination of children.
9330b57cec5SDimitry Andric       PendingColoring[SU->NodeNum] = NextNonReservedID++;
9340b57cec5SDimitry Andric   }
9350b57cec5SDimitry Andric   CurrentColoring = PendingColoring;
9360b57cec5SDimitry Andric }
9370b57cec5SDimitry Andric 
9380b57cec5SDimitry Andric 
colorForceConsecutiveOrderInGroup()9390b57cec5SDimitry Andric void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
9400b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
9410b57cec5SDimitry Andric   unsigned PreviousColor;
9420b57cec5SDimitry Andric   std::set<unsigned> SeenColors;
9430b57cec5SDimitry Andric 
9440b57cec5SDimitry Andric   if (DAGSize <= 1)
9450b57cec5SDimitry Andric     return;
9460b57cec5SDimitry Andric 
9470b57cec5SDimitry Andric   PreviousColor = CurrentColoring[0];
9480b57cec5SDimitry Andric 
9490b57cec5SDimitry Andric   for (unsigned i = 1, e = DAGSize; i != e; ++i) {
9500b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[i];
9510b57cec5SDimitry Andric     unsigned CurrentColor = CurrentColoring[i];
9520b57cec5SDimitry Andric     unsigned PreviousColorSave = PreviousColor;
9530b57cec5SDimitry Andric     assert(i == SU->NodeNum);
9540b57cec5SDimitry Andric 
9550b57cec5SDimitry Andric     if (CurrentColor != PreviousColor)
9560b57cec5SDimitry Andric       SeenColors.insert(PreviousColor);
9570b57cec5SDimitry Andric     PreviousColor = CurrentColor;
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
9600b57cec5SDimitry Andric       continue;
9610b57cec5SDimitry Andric 
9620b57cec5SDimitry Andric     if (SeenColors.find(CurrentColor) == SeenColors.end())
9630b57cec5SDimitry Andric       continue;
9640b57cec5SDimitry Andric 
9650b57cec5SDimitry Andric     if (PreviousColorSave != CurrentColor)
9660b57cec5SDimitry Andric       CurrentColoring[i] = NextNonReservedID++;
9670b57cec5SDimitry Andric     else
9680b57cec5SDimitry Andric       CurrentColoring[i] = CurrentColoring[i-1];
9690b57cec5SDimitry Andric   }
9700b57cec5SDimitry Andric }
9710b57cec5SDimitry Andric 
colorMergeConstantLoadsNextGroup()9720b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
9730b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
9740b57cec5SDimitry Andric 
9750b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
9760b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
9770b57cec5SDimitry Andric     std::set<unsigned> SUColors;
9780b57cec5SDimitry Andric 
9790b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
9800b57cec5SDimitry Andric       continue;
9810b57cec5SDimitry Andric 
9820b57cec5SDimitry Andric     // No predecessor: Vgpr constant loading.
9830b57cec5SDimitry Andric     // Low latency instructions usually have a predecessor (the address)
9840b57cec5SDimitry Andric     if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
9850b57cec5SDimitry Andric       continue;
9860b57cec5SDimitry Andric 
9870b57cec5SDimitry Andric     for (SDep& SuccDep : SU->Succs) {
9880b57cec5SDimitry Andric       SUnit *Succ = SuccDep.getSUnit();
9890b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
9900b57cec5SDimitry Andric         continue;
9910b57cec5SDimitry Andric       SUColors.insert(CurrentColoring[Succ->NodeNum]);
9920b57cec5SDimitry Andric     }
9930b57cec5SDimitry Andric     if (SUColors.size() == 1)
9940b57cec5SDimitry Andric       CurrentColoring[SU->NodeNum] = *SUColors.begin();
9950b57cec5SDimitry Andric   }
9960b57cec5SDimitry Andric }
9970b57cec5SDimitry Andric 
colorMergeIfPossibleNextGroup()9980b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
9990b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
10000b57cec5SDimitry Andric 
10010b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10020b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
10030b57cec5SDimitry Andric     std::set<unsigned> SUColors;
10040b57cec5SDimitry Andric 
10050b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10060b57cec5SDimitry Andric       continue;
10070b57cec5SDimitry Andric 
10080b57cec5SDimitry Andric     for (SDep& SuccDep : SU->Succs) {
10090b57cec5SDimitry Andric        SUnit *Succ = SuccDep.getSUnit();
10100b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10110b57cec5SDimitry Andric         continue;
10120b57cec5SDimitry Andric       SUColors.insert(CurrentColoring[Succ->NodeNum]);
10130b57cec5SDimitry Andric     }
10140b57cec5SDimitry Andric     if (SUColors.size() == 1)
10150b57cec5SDimitry Andric       CurrentColoring[SU->NodeNum] = *SUColors.begin();
10160b57cec5SDimitry Andric   }
10170b57cec5SDimitry Andric }
10180b57cec5SDimitry Andric 
colorMergeIfPossibleNextGroupOnlyForReserved()10190b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
10200b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
10210b57cec5SDimitry Andric 
10220b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10230b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
10240b57cec5SDimitry Andric     std::set<unsigned> SUColors;
10250b57cec5SDimitry Andric 
10260b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10270b57cec5SDimitry Andric       continue;
10280b57cec5SDimitry Andric 
10290b57cec5SDimitry Andric     for (SDep& SuccDep : SU->Succs) {
10300b57cec5SDimitry Andric        SUnit *Succ = SuccDep.getSUnit();
10310b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10320b57cec5SDimitry Andric         continue;
10330b57cec5SDimitry Andric       SUColors.insert(CurrentColoring[Succ->NodeNum]);
10340b57cec5SDimitry Andric     }
10350b57cec5SDimitry Andric     if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
10360b57cec5SDimitry Andric       CurrentColoring[SU->NodeNum] = *SUColors.begin();
10370b57cec5SDimitry Andric   }
10380b57cec5SDimitry Andric }
10390b57cec5SDimitry Andric 
colorMergeIfPossibleSmallGroupsToNextGroup()10400b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
10410b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
10420b57cec5SDimitry Andric   std::map<unsigned, unsigned> ColorCount;
10430b57cec5SDimitry Andric 
10440b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10450b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
10460b57cec5SDimitry Andric     unsigned color = CurrentColoring[SU->NodeNum];
10470b57cec5SDimitry Andric      ++ColorCount[color];
10480b57cec5SDimitry Andric   }
10490b57cec5SDimitry Andric 
10500b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10510b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
10520b57cec5SDimitry Andric     unsigned color = CurrentColoring[SU->NodeNum];
10530b57cec5SDimitry Andric     std::set<unsigned> SUColors;
10540b57cec5SDimitry Andric 
10550b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10560b57cec5SDimitry Andric       continue;
10570b57cec5SDimitry Andric 
10580b57cec5SDimitry Andric     if (ColorCount[color] > 1)
10590b57cec5SDimitry Andric       continue;
10600b57cec5SDimitry Andric 
10610b57cec5SDimitry Andric     for (SDep& SuccDep : SU->Succs) {
10620b57cec5SDimitry Andric        SUnit *Succ = SuccDep.getSUnit();
10630b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10640b57cec5SDimitry Andric         continue;
10650b57cec5SDimitry Andric       SUColors.insert(CurrentColoring[Succ->NodeNum]);
10660b57cec5SDimitry Andric     }
10670b57cec5SDimitry Andric     if (SUColors.size() == 1 && *SUColors.begin() != color) {
10680b57cec5SDimitry Andric       --ColorCount[color];
10690b57cec5SDimitry Andric       CurrentColoring[SU->NodeNum] = *SUColors.begin();
10700b57cec5SDimitry Andric       ++ColorCount[*SUColors.begin()];
10710b57cec5SDimitry Andric     }
10720b57cec5SDimitry Andric   }
10730b57cec5SDimitry Andric }
10740b57cec5SDimitry Andric 
cutHugeBlocks()10750b57cec5SDimitry Andric void SIScheduleBlockCreator::cutHugeBlocks() {
10760b57cec5SDimitry Andric   // TODO
10770b57cec5SDimitry Andric }
10780b57cec5SDimitry Andric 
regroupNoUserInstructions()10790b57cec5SDimitry Andric void SIScheduleBlockCreator::regroupNoUserInstructions() {
10800b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
10810b57cec5SDimitry Andric   int GroupID = NextNonReservedID++;
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric   for (unsigned SUNum : DAG->BottomUpIndex2SU) {
10840b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[SUNum];
10850b57cec5SDimitry Andric     bool hasSuccessor = false;
10860b57cec5SDimitry Andric 
10870b57cec5SDimitry Andric     if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
10880b57cec5SDimitry Andric       continue;
10890b57cec5SDimitry Andric 
10900b57cec5SDimitry Andric     for (SDep& SuccDep : SU->Succs) {
10910b57cec5SDimitry Andric        SUnit *Succ = SuccDep.getSUnit();
10920b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
10930b57cec5SDimitry Andric         continue;
10940b57cec5SDimitry Andric       hasSuccessor = true;
10950b57cec5SDimitry Andric     }
10960b57cec5SDimitry Andric     if (!hasSuccessor)
10970b57cec5SDimitry Andric       CurrentColoring[SU->NodeNum] = GroupID;
10980b57cec5SDimitry Andric   }
10990b57cec5SDimitry Andric }
11000b57cec5SDimitry Andric 
colorExports()11010b57cec5SDimitry Andric void SIScheduleBlockCreator::colorExports() {
11020b57cec5SDimitry Andric   unsigned ExportColor = NextNonReservedID++;
11030b57cec5SDimitry Andric   SmallVector<unsigned, 8> ExpGroup;
11040b57cec5SDimitry Andric 
11050b57cec5SDimitry Andric   // Put all exports together in a block.
11060b57cec5SDimitry Andric   // The block will naturally end up being scheduled last,
11070b57cec5SDimitry Andric   // thus putting exports at the end of the schedule, which
11080b57cec5SDimitry Andric   // is better for performance.
11090b57cec5SDimitry Andric   // However we must ensure, for safety, the exports can be put
11100b57cec5SDimitry Andric   // together in the same block without any other instruction.
11110b57cec5SDimitry Andric   // This could happen, for example, when scheduling after regalloc
11120b57cec5SDimitry Andric   // if reloading a spilled register from memory using the same
11130b57cec5SDimitry Andric   // register than used in a previous export.
11140b57cec5SDimitry Andric   // If that happens, do not regroup the exports.
11150b57cec5SDimitry Andric   for (unsigned SUNum : DAG->TopDownIndex2SU) {
11160b57cec5SDimitry Andric     const SUnit &SU = DAG->SUnits[SUNum];
11170b57cec5SDimitry Andric     if (SIInstrInfo::isEXP(*SU.getInstr())) {
111881ad6265SDimitry Andric       // SU is an export instruction. Check whether one of its successor
111981ad6265SDimitry Andric       // dependencies is a non-export, in which case we skip export grouping.
112081ad6265SDimitry Andric       for (const SDep &SuccDep : SU.Succs) {
112181ad6265SDimitry Andric         const SUnit *SuccSU = SuccDep.getSUnit();
112281ad6265SDimitry Andric         if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) {
112381ad6265SDimitry Andric           // Ignore these dependencies.
112481ad6265SDimitry Andric           continue;
112581ad6265SDimitry Andric         }
112681ad6265SDimitry Andric         assert(SuccSU->isInstr() &&
112781ad6265SDimitry Andric                "SUnit unexpectedly not representing an instruction!");
11280b57cec5SDimitry Andric 
112981ad6265SDimitry Andric         if (!SIInstrInfo::isEXP(*SuccSU->getInstr())) {
113081ad6265SDimitry Andric           // A non-export depends on us. Skip export grouping.
113181ad6265SDimitry Andric           // Note that this is a bit pessimistic: We could still group all other
113281ad6265SDimitry Andric           // exports that are not depended on by non-exports, directly or
113381ad6265SDimitry Andric           // indirectly. Simply skipping this particular export but grouping all
113481ad6265SDimitry Andric           // others would not account for indirect dependencies.
11350b57cec5SDimitry Andric           return;
11360b57cec5SDimitry Andric         }
11370b57cec5SDimitry Andric       }
11380b57cec5SDimitry Andric       ExpGroup.push_back(SUNum);
11390b57cec5SDimitry Andric     }
11400b57cec5SDimitry Andric   }
11410b57cec5SDimitry Andric 
11420b57cec5SDimitry Andric   // The group can be formed. Give the color.
11430b57cec5SDimitry Andric   for (unsigned j : ExpGroup)
11440b57cec5SDimitry Andric     CurrentColoring[j] = ExportColor;
11450b57cec5SDimitry Andric }
11460b57cec5SDimitry Andric 
createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant)11470b57cec5SDimitry Andric void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
11480b57cec5SDimitry Andric   unsigned DAGSize = DAG->SUnits.size();
11490b57cec5SDimitry Andric   std::map<unsigned,unsigned> RealID;
11500b57cec5SDimitry Andric 
11510b57cec5SDimitry Andric   CurrentBlocks.clear();
11520b57cec5SDimitry Andric   CurrentColoring.clear();
11530b57cec5SDimitry Andric   CurrentColoring.resize(DAGSize, 0);
11540b57cec5SDimitry Andric   Node2CurrentBlock.clear();
11550b57cec5SDimitry Andric 
11560b57cec5SDimitry Andric   // Restore links previous scheduling variant has overridden.
11570b57cec5SDimitry Andric   DAG->restoreSULinksLeft();
11580b57cec5SDimitry Andric 
11590b57cec5SDimitry Andric   NextReservedID = 1;
11600b57cec5SDimitry Andric   NextNonReservedID = DAGSize + 1;
11610b57cec5SDimitry Andric 
11620b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Coloring the graph\n");
11630b57cec5SDimitry Andric 
11640b57cec5SDimitry Andric   if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
11650b57cec5SDimitry Andric     colorHighLatenciesGroups();
11660b57cec5SDimitry Andric   else
11670b57cec5SDimitry Andric     colorHighLatenciesAlone();
11680b57cec5SDimitry Andric   colorComputeReservedDependencies();
11690b57cec5SDimitry Andric   colorAccordingToReservedDependencies();
11700b57cec5SDimitry Andric   colorEndsAccordingToDependencies();
11710b57cec5SDimitry Andric   if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
11720b57cec5SDimitry Andric     colorForceConsecutiveOrderInGroup();
11730b57cec5SDimitry Andric   regroupNoUserInstructions();
11740b57cec5SDimitry Andric   colorMergeConstantLoadsNextGroup();
11750b57cec5SDimitry Andric   colorMergeIfPossibleNextGroupOnlyForReserved();
11760b57cec5SDimitry Andric   colorExports();
11770b57cec5SDimitry Andric 
11780b57cec5SDimitry Andric   // Put SUs of same color into same block
11790b57cec5SDimitry Andric   Node2CurrentBlock.resize(DAGSize, -1);
11800b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
11810b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[i];
11820b57cec5SDimitry Andric     unsigned Color = CurrentColoring[SU->NodeNum];
11830b57cec5SDimitry Andric     if (RealID.find(Color) == RealID.end()) {
11840b57cec5SDimitry Andric       int ID = CurrentBlocks.size();
11858bcb0991SDimitry Andric       BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
11860b57cec5SDimitry Andric       CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
11870b57cec5SDimitry Andric       RealID[Color] = ID;
11880b57cec5SDimitry Andric     }
11890b57cec5SDimitry Andric     CurrentBlocks[RealID[Color]]->addUnit(SU);
11900b57cec5SDimitry Andric     Node2CurrentBlock[SU->NodeNum] = RealID[Color];
11910b57cec5SDimitry Andric   }
11920b57cec5SDimitry Andric 
11930b57cec5SDimitry Andric   // Build dependencies between blocks.
11940b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
11950b57cec5SDimitry Andric     SUnit *SU = &DAG->SUnits[i];
11960b57cec5SDimitry Andric     int SUID = Node2CurrentBlock[i];
11970b57cec5SDimitry Andric      for (SDep& SuccDep : SU->Succs) {
11980b57cec5SDimitry Andric        SUnit *Succ = SuccDep.getSUnit();
11990b57cec5SDimitry Andric       if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
12000b57cec5SDimitry Andric         continue;
12010b57cec5SDimitry Andric       if (Node2CurrentBlock[Succ->NodeNum] != SUID)
12020b57cec5SDimitry Andric         CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
12030b57cec5SDimitry Andric                                      SuccDep.isCtrl() ? NoData : Data);
12040b57cec5SDimitry Andric     }
12050b57cec5SDimitry Andric     for (SDep& PredDep : SU->Preds) {
12060b57cec5SDimitry Andric       SUnit *Pred = PredDep.getSUnit();
12070b57cec5SDimitry Andric       if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
12080b57cec5SDimitry Andric         continue;
12090b57cec5SDimitry Andric       if (Node2CurrentBlock[Pred->NodeNum] != SUID)
12100b57cec5SDimitry Andric         CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
12110b57cec5SDimitry Andric     }
12120b57cec5SDimitry Andric   }
12130b57cec5SDimitry Andric 
12140b57cec5SDimitry Andric   // Free root and leafs of all blocks to enable scheduling inside them.
12150eae32dcSDimitry Andric   for (SIScheduleBlock *Block : CurrentBlocks)
12160b57cec5SDimitry Andric     Block->finalizeUnits();
12170eae32dcSDimitry Andric   LLVM_DEBUG({
12180eae32dcSDimitry Andric     dbgs() << "Blocks created:\n\n";
12190eae32dcSDimitry Andric     for (SIScheduleBlock *Block : CurrentBlocks)
12200b57cec5SDimitry Andric       Block->printDebug(true);
12210b57cec5SDimitry Andric   });
12220b57cec5SDimitry Andric }
12230b57cec5SDimitry Andric 
12240b57cec5SDimitry Andric // Two functions taken from Codegen/MachineScheduler.cpp
12250b57cec5SDimitry Andric 
12260b57cec5SDimitry Andric /// Non-const version.
12270b57cec5SDimitry Andric static MachineBasicBlock::iterator
nextIfDebug(MachineBasicBlock::iterator I,MachineBasicBlock::const_iterator End)12280b57cec5SDimitry Andric nextIfDebug(MachineBasicBlock::iterator I,
12290b57cec5SDimitry Andric             MachineBasicBlock::const_iterator End) {
12300b57cec5SDimitry Andric   for (; I != End; ++I) {
12310b57cec5SDimitry Andric     if (!I->isDebugInstr())
12320b57cec5SDimitry Andric       break;
12330b57cec5SDimitry Andric   }
12340b57cec5SDimitry Andric   return I;
12350b57cec5SDimitry Andric }
12360b57cec5SDimitry Andric 
topologicalSort()12370b57cec5SDimitry Andric void SIScheduleBlockCreator::topologicalSort() {
12380b57cec5SDimitry Andric   unsigned DAGSize = CurrentBlocks.size();
12390b57cec5SDimitry Andric   std::vector<int> WorkList;
12400b57cec5SDimitry Andric 
12410b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Topological Sort\n");
12420b57cec5SDimitry Andric 
12430b57cec5SDimitry Andric   WorkList.reserve(DAGSize);
12440b57cec5SDimitry Andric   TopDownIndex2Block.resize(DAGSize);
12450b57cec5SDimitry Andric   TopDownBlock2Index.resize(DAGSize);
12460b57cec5SDimitry Andric   BottomUpIndex2Block.resize(DAGSize);
12470b57cec5SDimitry Andric 
12480b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
12490b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[i];
12500b57cec5SDimitry Andric     unsigned Degree = Block->getSuccs().size();
12510b57cec5SDimitry Andric     TopDownBlock2Index[i] = Degree;
12520b57cec5SDimitry Andric     if (Degree == 0) {
12530b57cec5SDimitry Andric       WorkList.push_back(i);
12540b57cec5SDimitry Andric     }
12550b57cec5SDimitry Andric   }
12560b57cec5SDimitry Andric 
12570b57cec5SDimitry Andric   int Id = DAGSize;
12580b57cec5SDimitry Andric   while (!WorkList.empty()) {
12590b57cec5SDimitry Andric     int i = WorkList.back();
12600b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[i];
12610b57cec5SDimitry Andric     WorkList.pop_back();
12620b57cec5SDimitry Andric     TopDownBlock2Index[i] = --Id;
12630b57cec5SDimitry Andric     TopDownIndex2Block[Id] = i;
12640b57cec5SDimitry Andric     for (SIScheduleBlock* Pred : Block->getPreds()) {
12650b57cec5SDimitry Andric       if (!--TopDownBlock2Index[Pred->getID()])
12660b57cec5SDimitry Andric         WorkList.push_back(Pred->getID());
12670b57cec5SDimitry Andric     }
12680b57cec5SDimitry Andric   }
12690b57cec5SDimitry Andric 
12700b57cec5SDimitry Andric #ifndef NDEBUG
12710b57cec5SDimitry Andric   // Check correctness of the ordering.
12720b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
12730b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[i];
12740b57cec5SDimitry Andric     for (SIScheduleBlock* Pred : Block->getPreds()) {
12750b57cec5SDimitry Andric       assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
12760b57cec5SDimitry Andric       "Wrong Top Down topological sorting");
12770b57cec5SDimitry Andric     }
12780b57cec5SDimitry Andric   }
12790b57cec5SDimitry Andric #endif
12800b57cec5SDimitry Andric 
12810b57cec5SDimitry Andric   BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
12820b57cec5SDimitry Andric                                          TopDownIndex2Block.rend());
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric 
scheduleInsideBlocks()12850b57cec5SDimitry Andric void SIScheduleBlockCreator::scheduleInsideBlocks() {
12860b57cec5SDimitry Andric   unsigned DAGSize = CurrentBlocks.size();
12870b57cec5SDimitry Andric 
12880b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n");
12890b57cec5SDimitry Andric 
12900b57cec5SDimitry Andric   // We do schedule a valid scheduling such that a Block corresponds
12910b57cec5SDimitry Andric   // to a range of instructions.
12920b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
12930b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
12940b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[i];
12950b57cec5SDimitry Andric     Block->fastSchedule();
12960b57cec5SDimitry Andric   }
12970b57cec5SDimitry Andric 
12980b57cec5SDimitry Andric   // Note: the following code, and the part restoring previous position
12990b57cec5SDimitry Andric   // is by far the most expensive operation of the Scheduler.
13000b57cec5SDimitry Andric 
13010b57cec5SDimitry Andric   // Do not update CurrentTop.
13020b57cec5SDimitry Andric   MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
13030b57cec5SDimitry Andric   std::vector<MachineBasicBlock::iterator> PosOld;
13040b57cec5SDimitry Andric   std::vector<MachineBasicBlock::iterator> PosNew;
13050b57cec5SDimitry Andric   PosOld.reserve(DAG->SUnits.size());
13060b57cec5SDimitry Andric   PosNew.reserve(DAG->SUnits.size());
13070b57cec5SDimitry Andric 
13080b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13090b57cec5SDimitry Andric     int BlockIndice = TopDownIndex2Block[i];
13100b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
13110b57cec5SDimitry Andric     std::vector<SUnit*> SUs = Block->getScheduledUnits();
13120b57cec5SDimitry Andric 
13130b57cec5SDimitry Andric     for (SUnit* SU : SUs) {
13140b57cec5SDimitry Andric       MachineInstr *MI = SU->getInstr();
13150b57cec5SDimitry Andric       MachineBasicBlock::iterator Pos = MI;
13160b57cec5SDimitry Andric       PosOld.push_back(Pos);
13170b57cec5SDimitry Andric       if (&*CurrentTopFastSched == MI) {
13180b57cec5SDimitry Andric         PosNew.push_back(Pos);
13190b57cec5SDimitry Andric         CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
13200b57cec5SDimitry Andric                                           DAG->getCurrentBottom());
13210b57cec5SDimitry Andric       } else {
13220b57cec5SDimitry Andric         // Update the instruction stream.
13230b57cec5SDimitry Andric         DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
13240b57cec5SDimitry Andric 
13250b57cec5SDimitry Andric         // Update LiveIntervals.
13260b57cec5SDimitry Andric         // Note: Moving all instructions and calling handleMove every time
13270b57cec5SDimitry Andric         // is the most cpu intensive operation of the scheduler.
13280b57cec5SDimitry Andric         // It would gain a lot if there was a way to recompute the
13290b57cec5SDimitry Andric         // LiveIntervals for the entire scheduling region.
13300b57cec5SDimitry Andric         DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
13310b57cec5SDimitry Andric         PosNew.push_back(CurrentTopFastSched);
13320b57cec5SDimitry Andric       }
13330b57cec5SDimitry Andric     }
13340b57cec5SDimitry Andric   }
13350b57cec5SDimitry Andric 
13360b57cec5SDimitry Andric   // Now we have Block of SUs == Block of MI.
13370b57cec5SDimitry Andric   // We do the final schedule for the instructions inside the block.
13380b57cec5SDimitry Andric   // The property that all the SUs of the Block are grouped together as MI
13390b57cec5SDimitry Andric   // is used for correct reg usage tracking.
13400b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13410b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[i];
13420b57cec5SDimitry Andric     std::vector<SUnit*> SUs = Block->getScheduledUnits();
13430b57cec5SDimitry Andric     Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
13440b57cec5SDimitry Andric   }
13450b57cec5SDimitry Andric 
13460b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Restoring MI Pos\n");
13470b57cec5SDimitry Andric   // Restore old ordering (which prevents a LIS->handleMove bug).
13480b57cec5SDimitry Andric   for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
13490b57cec5SDimitry Andric     MachineBasicBlock::iterator POld = PosOld[i-1];
13500b57cec5SDimitry Andric     MachineBasicBlock::iterator PNew = PosNew[i-1];
13510b57cec5SDimitry Andric     if (PNew != POld) {
13520b57cec5SDimitry Andric       // Update the instruction stream.
13530b57cec5SDimitry Andric       DAG->getBB()->splice(POld, DAG->getBB(), PNew);
13540b57cec5SDimitry Andric 
13550b57cec5SDimitry Andric       // Update LiveIntervals.
13560b57cec5SDimitry Andric       DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
13570b57cec5SDimitry Andric     }
13580b57cec5SDimitry Andric   }
13590b57cec5SDimitry Andric 
13600eae32dcSDimitry Andric   LLVM_DEBUG({
13610eae32dcSDimitry Andric     for (SIScheduleBlock *Block : CurrentBlocks)
13620b57cec5SDimitry Andric       Block->printDebug(true);
13630b57cec5SDimitry Andric   });
13640b57cec5SDimitry Andric }
13650b57cec5SDimitry Andric 
fillStats()13660b57cec5SDimitry Andric void SIScheduleBlockCreator::fillStats() {
13670b57cec5SDimitry Andric   unsigned DAGSize = CurrentBlocks.size();
13680b57cec5SDimitry Andric 
13690b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13700b57cec5SDimitry Andric     int BlockIndice = TopDownIndex2Block[i];
13710b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
13720b57cec5SDimitry Andric     if (Block->getPreds().empty())
13730b57cec5SDimitry Andric       Block->Depth = 0;
13740b57cec5SDimitry Andric     else {
13750b57cec5SDimitry Andric       unsigned Depth = 0;
13760b57cec5SDimitry Andric       for (SIScheduleBlock *Pred : Block->getPreds()) {
13770b57cec5SDimitry Andric         if (Depth < Pred->Depth + Pred->getCost())
13780b57cec5SDimitry Andric           Depth = Pred->Depth + Pred->getCost();
13790b57cec5SDimitry Andric       }
13800b57cec5SDimitry Andric       Block->Depth = Depth;
13810b57cec5SDimitry Andric     }
13820b57cec5SDimitry Andric   }
13830b57cec5SDimitry Andric 
13840b57cec5SDimitry Andric   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
13850b57cec5SDimitry Andric     int BlockIndice = BottomUpIndex2Block[i];
13860b57cec5SDimitry Andric     SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
13870b57cec5SDimitry Andric     if (Block->getSuccs().empty())
13880b57cec5SDimitry Andric       Block->Height = 0;
13890b57cec5SDimitry Andric     else {
13900b57cec5SDimitry Andric       unsigned Height = 0;
13910b57cec5SDimitry Andric       for (const auto &Succ : Block->getSuccs())
13920b57cec5SDimitry Andric         Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
13930b57cec5SDimitry Andric       Block->Height = Height;
13940b57cec5SDimitry Andric     }
13950b57cec5SDimitry Andric   }
13960b57cec5SDimitry Andric }
13970b57cec5SDimitry Andric 
13980b57cec5SDimitry Andric // SIScheduleBlockScheduler //
13990b57cec5SDimitry Andric 
SIScheduleBlockScheduler(SIScheduleDAGMI * DAG,SISchedulerBlockSchedulerVariant Variant,SIScheduleBlocks BlocksStruct)14000b57cec5SDimitry Andric SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
14010b57cec5SDimitry Andric                                                    SISchedulerBlockSchedulerVariant Variant,
14020b57cec5SDimitry Andric                                                    SIScheduleBlocks  BlocksStruct) :
14030b57cec5SDimitry Andric   DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
14040b57cec5SDimitry Andric   LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
14050b57cec5SDimitry Andric   SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
14060b57cec5SDimitry Andric 
14070b57cec5SDimitry Andric   // Fill the usage of every output
14080b57cec5SDimitry Andric   // Warning: while by construction we always have a link between two blocks
14090b57cec5SDimitry Andric   // when one needs a result from the other, the number of users of an output
14100b57cec5SDimitry Andric   // is not the sum of child blocks having as input the same virtual register.
14110b57cec5SDimitry Andric   // Here is an example. A produces x and y. B eats x and produces x'.
14120b57cec5SDimitry Andric   // C eats x' and y. The register coalescer may have attributed the same
14130b57cec5SDimitry Andric   // virtual register to x and x'.
14140b57cec5SDimitry Andric   // To count accurately, we do a topological sort. In case the register is
14150b57cec5SDimitry Andric   // found for several parents, we increment the usage of the one with the
14160b57cec5SDimitry Andric   // highest topological index.
14170b57cec5SDimitry Andric   LiveOutRegsNumUsages.resize(Blocks.size());
14180eae32dcSDimitry Andric   for (SIScheduleBlock *Block : Blocks) {
14190b57cec5SDimitry Andric     for (unsigned Reg : Block->getInRegs()) {
14200b57cec5SDimitry Andric       bool Found = false;
14210b57cec5SDimitry Andric       int topoInd = -1;
14220b57cec5SDimitry Andric       for (SIScheduleBlock* Pred: Block->getPreds()) {
14230b57cec5SDimitry Andric         std::set<unsigned> PredOutRegs = Pred->getOutRegs();
14240b57cec5SDimitry Andric         std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
14250b57cec5SDimitry Andric 
14260b57cec5SDimitry Andric         if (RegPos != PredOutRegs.end()) {
14270b57cec5SDimitry Andric           Found = true;
14280b57cec5SDimitry Andric           if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
14290b57cec5SDimitry Andric             topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
14300b57cec5SDimitry Andric           }
14310b57cec5SDimitry Andric         }
14320b57cec5SDimitry Andric       }
14330b57cec5SDimitry Andric 
14340b57cec5SDimitry Andric       if (!Found)
14350b57cec5SDimitry Andric         continue;
14360b57cec5SDimitry Andric 
14370b57cec5SDimitry Andric       int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
14380b57cec5SDimitry Andric       ++LiveOutRegsNumUsages[PredID][Reg];
14390b57cec5SDimitry Andric     }
14400b57cec5SDimitry Andric   }
14410b57cec5SDimitry Andric 
14420b57cec5SDimitry Andric   LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
14430b57cec5SDimitry Andric   BlockNumPredsLeft.resize(Blocks.size());
14440b57cec5SDimitry Andric   BlockNumSuccsLeft.resize(Blocks.size());
14450b57cec5SDimitry Andric 
14460b57cec5SDimitry Andric   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
14470b57cec5SDimitry Andric     SIScheduleBlock *Block = Blocks[i];
14480b57cec5SDimitry Andric     BlockNumPredsLeft[i] = Block->getPreds().size();
14490b57cec5SDimitry Andric     BlockNumSuccsLeft[i] = Block->getSuccs().size();
14500b57cec5SDimitry Andric   }
14510b57cec5SDimitry Andric 
14520b57cec5SDimitry Andric #ifndef NDEBUG
14530b57cec5SDimitry Andric   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
14540b57cec5SDimitry Andric     SIScheduleBlock *Block = Blocks[i];
14550b57cec5SDimitry Andric     assert(Block->getID() == i);
14560b57cec5SDimitry Andric   }
14570b57cec5SDimitry Andric #endif
14580b57cec5SDimitry Andric 
14590b57cec5SDimitry Andric   std::set<unsigned> InRegs = DAG->getInRegs();
14600b57cec5SDimitry Andric   addLiveRegs(InRegs);
14610b57cec5SDimitry Andric 
14620b57cec5SDimitry Andric   // Increase LiveOutRegsNumUsages for blocks
14630b57cec5SDimitry Andric   // producing registers consumed in another
14640b57cec5SDimitry Andric   // scheduling region.
14650b57cec5SDimitry Andric   for (unsigned Reg : DAG->getOutRegs()) {
14660b57cec5SDimitry Andric     for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
14670b57cec5SDimitry Andric       // Do reverse traversal
14680b57cec5SDimitry Andric       int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
14690b57cec5SDimitry Andric       SIScheduleBlock *Block = Blocks[ID];
14700b57cec5SDimitry Andric       const std::set<unsigned> &OutRegs = Block->getOutRegs();
14710b57cec5SDimitry Andric 
14720b57cec5SDimitry Andric       if (OutRegs.find(Reg) == OutRegs.end())
14730b57cec5SDimitry Andric         continue;
14740b57cec5SDimitry Andric 
14750b57cec5SDimitry Andric       ++LiveOutRegsNumUsages[ID][Reg];
14760b57cec5SDimitry Andric       break;
14770b57cec5SDimitry Andric     }
14780b57cec5SDimitry Andric   }
14790b57cec5SDimitry Andric 
14800b57cec5SDimitry Andric   // Fill LiveRegsConsumers for regs that were already
14810b57cec5SDimitry Andric   // defined before scheduling.
14820eae32dcSDimitry Andric   for (SIScheduleBlock *Block : Blocks) {
14830b57cec5SDimitry Andric     for (unsigned Reg : Block->getInRegs()) {
14840b57cec5SDimitry Andric       bool Found = false;
14850b57cec5SDimitry Andric       for (SIScheduleBlock* Pred: Block->getPreds()) {
14860b57cec5SDimitry Andric         std::set<unsigned> PredOutRegs = Pred->getOutRegs();
14870b57cec5SDimitry Andric         std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
14880b57cec5SDimitry Andric 
14890b57cec5SDimitry Andric         if (RegPos != PredOutRegs.end()) {
14900b57cec5SDimitry Andric           Found = true;
14910b57cec5SDimitry Andric           break;
14920b57cec5SDimitry Andric         }
14930b57cec5SDimitry Andric       }
14940b57cec5SDimitry Andric 
14950b57cec5SDimitry Andric       if (!Found)
14960b57cec5SDimitry Andric         ++LiveRegsConsumers[Reg];
14970b57cec5SDimitry Andric     }
14980b57cec5SDimitry Andric   }
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
15010b57cec5SDimitry Andric     SIScheduleBlock *Block = Blocks[i];
15020b57cec5SDimitry Andric     if (BlockNumPredsLeft[i] == 0) {
15030b57cec5SDimitry Andric       ReadyBlocks.push_back(Block);
15040b57cec5SDimitry Andric     }
15050b57cec5SDimitry Andric   }
15060b57cec5SDimitry Andric 
15070b57cec5SDimitry Andric   while (SIScheduleBlock *Block = pickBlock()) {
15080b57cec5SDimitry Andric     BlocksScheduled.push_back(Block);
15090b57cec5SDimitry Andric     blockScheduled(Block);
15100b57cec5SDimitry Andric   }
15110b57cec5SDimitry Andric 
15120b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Block
15130b57cec5SDimitry Andric                                             : BlocksScheduled) {
15140b57cec5SDimitry Andric     dbgs() << ' ' << Block->getID();
15150b57cec5SDimitry Andric   } dbgs() << '\n';);
15160b57cec5SDimitry Andric }
15170b57cec5SDimitry Andric 
tryCandidateLatency(SIBlockSchedCandidate & Cand,SIBlockSchedCandidate & TryCand)15180b57cec5SDimitry Andric bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
15190b57cec5SDimitry Andric                                                    SIBlockSchedCandidate &TryCand) {
15200b57cec5SDimitry Andric   if (!Cand.isValid()) {
15210b57cec5SDimitry Andric     TryCand.Reason = NodeOrder;
15220b57cec5SDimitry Andric     return true;
15230b57cec5SDimitry Andric   }
15240b57cec5SDimitry Andric 
15250b57cec5SDimitry Andric   // Try to hide high latencies.
15260b57cec5SDimitry Andric   if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
15270b57cec5SDimitry Andric                  Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
15280b57cec5SDimitry Andric     return true;
15290b57cec5SDimitry Andric   // Schedule high latencies early so you can hide them better.
15300b57cec5SDimitry Andric   if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
15310b57cec5SDimitry Andric                           TryCand, Cand, Latency))
15320b57cec5SDimitry Andric     return true;
15330b57cec5SDimitry Andric   if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
15340b57cec5SDimitry Andric                                                    TryCand, Cand, Depth))
15350b57cec5SDimitry Andric     return true;
15360b57cec5SDimitry Andric   if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
15370b57cec5SDimitry Andric                           Cand.NumHighLatencySuccessors,
15380b57cec5SDimitry Andric                           TryCand, Cand, Successor))
15390b57cec5SDimitry Andric     return true;
15400b57cec5SDimitry Andric   return false;
15410b57cec5SDimitry Andric }
15420b57cec5SDimitry Andric 
tryCandidateRegUsage(SIBlockSchedCandidate & Cand,SIBlockSchedCandidate & TryCand)15430b57cec5SDimitry Andric bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
15440b57cec5SDimitry Andric                                                     SIBlockSchedCandidate &TryCand) {
15450b57cec5SDimitry Andric   if (!Cand.isValid()) {
15460b57cec5SDimitry Andric     TryCand.Reason = NodeOrder;
15470b57cec5SDimitry Andric     return true;
15480b57cec5SDimitry Andric   }
15490b57cec5SDimitry Andric 
15500b57cec5SDimitry Andric   if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
15510b57cec5SDimitry Andric                        TryCand, Cand, RegUsage))
15520b57cec5SDimitry Andric     return true;
15530b57cec5SDimitry Andric   if (SISched::tryGreater(TryCand.NumSuccessors > 0,
15540b57cec5SDimitry Andric                           Cand.NumSuccessors > 0,
15550b57cec5SDimitry Andric                           TryCand, Cand, Successor))
15560b57cec5SDimitry Andric     return true;
15570b57cec5SDimitry Andric   if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
15580b57cec5SDimitry Andric     return true;
15590b57cec5SDimitry Andric   if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
15600b57cec5SDimitry Andric                        TryCand, Cand, RegUsage))
15610b57cec5SDimitry Andric     return true;
15620b57cec5SDimitry Andric   return false;
15630b57cec5SDimitry Andric }
15640b57cec5SDimitry Andric 
pickBlock()15650b57cec5SDimitry Andric SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
15660b57cec5SDimitry Andric   SIBlockSchedCandidate Cand;
15670b57cec5SDimitry Andric   std::vector<SIScheduleBlock*>::iterator Best;
15680b57cec5SDimitry Andric   SIScheduleBlock *Block;
15690b57cec5SDimitry Andric   if (ReadyBlocks.empty())
15700b57cec5SDimitry Andric     return nullptr;
15710b57cec5SDimitry Andric 
15720b57cec5SDimitry Andric   DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
15730b57cec5SDimitry Andric                         VregCurrentUsage, SregCurrentUsage);
15740b57cec5SDimitry Andric   if (VregCurrentUsage > maxVregUsage)
15750b57cec5SDimitry Andric     maxVregUsage = VregCurrentUsage;
15760b57cec5SDimitry Andric   if (SregCurrentUsage > maxSregUsage)
15770b57cec5SDimitry Andric     maxSregUsage = SregCurrentUsage;
15780b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";
15790b57cec5SDimitry Andric              for (SIScheduleBlock *Block
15800b57cec5SDimitry Andric                   : ReadyBlocks) dbgs()
15810b57cec5SDimitry Andric              << Block->getID() << ' ';
15820b57cec5SDimitry Andric              dbgs() << "\nCurrent Live:\n";
15830b57cec5SDimitry Andric              for (unsigned Reg
15840b57cec5SDimitry Andric                   : LiveRegs) dbgs()
15850b57cec5SDimitry Andric              << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
15860b57cec5SDimitry Andric              dbgs() << '\n';
15870b57cec5SDimitry Andric              dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
15880b57cec5SDimitry Andric              dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';);
15890b57cec5SDimitry Andric 
15900b57cec5SDimitry Andric   Cand.Block = nullptr;
15910b57cec5SDimitry Andric   for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
15920b57cec5SDimitry Andric        E = ReadyBlocks.end(); I != E; ++I) {
15930b57cec5SDimitry Andric     SIBlockSchedCandidate TryCand;
15940b57cec5SDimitry Andric     TryCand.Block = *I;
15950b57cec5SDimitry Andric     TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
15960b57cec5SDimitry Andric     TryCand.VGPRUsageDiff =
15970b57cec5SDimitry Andric       checkRegUsageImpact(TryCand.Block->getInRegs(),
15985ffd83dbSDimitry Andric           TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
15990b57cec5SDimitry Andric     TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
16000b57cec5SDimitry Andric     TryCand.NumHighLatencySuccessors =
16010b57cec5SDimitry Andric       TryCand.Block->getNumHighLatencySuccessors();
16020b57cec5SDimitry Andric     TryCand.LastPosHighLatParentScheduled =
16030b57cec5SDimitry Andric       (unsigned int) std::max<int> (0,
16040b57cec5SDimitry Andric          LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
16050b57cec5SDimitry Andric            LastPosWaitedHighLatency);
16060b57cec5SDimitry Andric     TryCand.Height = TryCand.Block->Height;
16070b57cec5SDimitry Andric     // Try not to increase VGPR usage too much, else we may spill.
16080b57cec5SDimitry Andric     if (VregCurrentUsage > 120 ||
16090b57cec5SDimitry Andric         Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
16100b57cec5SDimitry Andric       if (!tryCandidateRegUsage(Cand, TryCand) &&
16110b57cec5SDimitry Andric           Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
16120b57cec5SDimitry Andric         tryCandidateLatency(Cand, TryCand);
16130b57cec5SDimitry Andric     } else {
16140b57cec5SDimitry Andric       if (!tryCandidateLatency(Cand, TryCand))
16150b57cec5SDimitry Andric         tryCandidateRegUsage(Cand, TryCand);
16160b57cec5SDimitry Andric     }
16170b57cec5SDimitry Andric     if (TryCand.Reason != NoCand) {
16180b57cec5SDimitry Andric       Cand.setBest(TryCand);
16190b57cec5SDimitry Andric       Best = I;
16200b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
16210b57cec5SDimitry Andric                         << getReasonStr(Cand.Reason) << '\n');
16220b57cec5SDimitry Andric     }
16230b57cec5SDimitry Andric   }
16240b57cec5SDimitry Andric 
16250b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';
16260b57cec5SDimitry Andric              dbgs() << "Is a block with high latency instruction: "
16270b57cec5SDimitry Andric                     << (Cand.IsHighLatency ? "yes\n" : "no\n");
16280b57cec5SDimitry Andric              dbgs() << "Position of last high latency dependency: "
16290b57cec5SDimitry Andric                     << Cand.LastPosHighLatParentScheduled << '\n';
16300b57cec5SDimitry Andric              dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
16310b57cec5SDimitry Andric              dbgs() << '\n';);
16320b57cec5SDimitry Andric 
16330b57cec5SDimitry Andric   Block = Cand.Block;
16340b57cec5SDimitry Andric   ReadyBlocks.erase(Best);
16350b57cec5SDimitry Andric   return Block;
16360b57cec5SDimitry Andric }
16370b57cec5SDimitry Andric 
16380b57cec5SDimitry Andric // Tracking of currently alive registers to determine VGPR Usage.
16390b57cec5SDimitry Andric 
addLiveRegs(std::set<unsigned> & Regs)16400b57cec5SDimitry Andric void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1641e8d8bef9SDimitry Andric   for (Register Reg : Regs) {
16420b57cec5SDimitry Andric     // For now only track virtual registers.
1643e8d8bef9SDimitry Andric     if (!Reg.isVirtual())
16440b57cec5SDimitry Andric       continue;
16450b57cec5SDimitry Andric     // If not already in the live set, then add it.
16460b57cec5SDimitry Andric     (void) LiveRegs.insert(Reg);
16470b57cec5SDimitry Andric   }
16480b57cec5SDimitry Andric }
16490b57cec5SDimitry Andric 
decreaseLiveRegs(SIScheduleBlock * Block,std::set<unsigned> & Regs)16500b57cec5SDimitry Andric void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
16510b57cec5SDimitry Andric                                        std::set<unsigned> &Regs) {
16520b57cec5SDimitry Andric   for (unsigned Reg : Regs) {
16530b57cec5SDimitry Andric     // For now only track virtual registers.
16540b57cec5SDimitry Andric     std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
16550b57cec5SDimitry Andric     assert (Pos != LiveRegs.end() && // Reg must be live.
16560b57cec5SDimitry Andric                LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
16570b57cec5SDimitry Andric                LiveRegsConsumers[Reg] >= 1);
16580b57cec5SDimitry Andric     --LiveRegsConsumers[Reg];
16590b57cec5SDimitry Andric     if (LiveRegsConsumers[Reg] == 0)
16600b57cec5SDimitry Andric       LiveRegs.erase(Pos);
16610b57cec5SDimitry Andric   }
16620b57cec5SDimitry Andric }
16630b57cec5SDimitry Andric 
releaseBlockSuccs(SIScheduleBlock * Parent)16640b57cec5SDimitry Andric void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
16650b57cec5SDimitry Andric   for (const auto &Block : Parent->getSuccs()) {
16660b57cec5SDimitry Andric     if (--BlockNumPredsLeft[Block.first->getID()] == 0)
16670b57cec5SDimitry Andric       ReadyBlocks.push_back(Block.first);
16680b57cec5SDimitry Andric 
16690b57cec5SDimitry Andric     if (Parent->isHighLatencyBlock() &&
16700b57cec5SDimitry Andric         Block.second == SIScheduleBlockLinkKind::Data)
16710b57cec5SDimitry Andric       LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
16720b57cec5SDimitry Andric   }
16730b57cec5SDimitry Andric }
16740b57cec5SDimitry Andric 
blockScheduled(SIScheduleBlock * Block)16750b57cec5SDimitry Andric void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
16760b57cec5SDimitry Andric   decreaseLiveRegs(Block, Block->getInRegs());
16770b57cec5SDimitry Andric   addLiveRegs(Block->getOutRegs());
16780b57cec5SDimitry Andric   releaseBlockSuccs(Block);
16790eae32dcSDimitry Andric   for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {
16800b57cec5SDimitry Andric     // We produce this register, thus it must not be previously alive.
16810b57cec5SDimitry Andric     assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
16820b57cec5SDimitry Andric            LiveRegsConsumers[RegP.first] == 0);
16830b57cec5SDimitry Andric     LiveRegsConsumers[RegP.first] += RegP.second;
16840b57cec5SDimitry Andric   }
16850b57cec5SDimitry Andric   if (LastPosHighLatencyParentScheduled[Block->getID()] >
16860b57cec5SDimitry Andric         (unsigned)LastPosWaitedHighLatency)
16870b57cec5SDimitry Andric     LastPosWaitedHighLatency =
16880b57cec5SDimitry Andric       LastPosHighLatencyParentScheduled[Block->getID()];
16890b57cec5SDimitry Andric   ++NumBlockScheduled;
16900b57cec5SDimitry Andric }
16910b57cec5SDimitry Andric 
16920b57cec5SDimitry Andric std::vector<int>
checkRegUsageImpact(std::set<unsigned> & InRegs,std::set<unsigned> & OutRegs)16930b57cec5SDimitry Andric SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
16940b57cec5SDimitry Andric                                      std::set<unsigned> &OutRegs) {
16950b57cec5SDimitry Andric   std::vector<int> DiffSetPressure;
16960b57cec5SDimitry Andric   DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
16970b57cec5SDimitry Andric 
1698e8d8bef9SDimitry Andric   for (Register Reg : InRegs) {
16990b57cec5SDimitry Andric     // For now only track virtual registers.
1700e8d8bef9SDimitry Andric     if (!Reg.isVirtual())
17010b57cec5SDimitry Andric       continue;
17020b57cec5SDimitry Andric     if (LiveRegsConsumers[Reg] > 1)
17030b57cec5SDimitry Andric       continue;
17040b57cec5SDimitry Andric     PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
17050b57cec5SDimitry Andric     for (; PSetI.isValid(); ++PSetI) {
17060b57cec5SDimitry Andric       DiffSetPressure[*PSetI] -= PSetI.getWeight();
17070b57cec5SDimitry Andric     }
17080b57cec5SDimitry Andric   }
17090b57cec5SDimitry Andric 
1710e8d8bef9SDimitry Andric   for (Register Reg : OutRegs) {
17110b57cec5SDimitry Andric     // For now only track virtual registers.
1712e8d8bef9SDimitry Andric     if (!Reg.isVirtual())
17130b57cec5SDimitry Andric       continue;
17140b57cec5SDimitry Andric     PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
17150b57cec5SDimitry Andric     for (; PSetI.isValid(); ++PSetI) {
17160b57cec5SDimitry Andric       DiffSetPressure[*PSetI] += PSetI.getWeight();
17170b57cec5SDimitry Andric     }
17180b57cec5SDimitry Andric   }
17190b57cec5SDimitry Andric 
17200b57cec5SDimitry Andric   return DiffSetPressure;
17210b57cec5SDimitry Andric }
17220b57cec5SDimitry Andric 
17230b57cec5SDimitry Andric // SIScheduler //
17240b57cec5SDimitry Andric 
17250b57cec5SDimitry Andric struct SIScheduleBlockResult
scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,SISchedulerBlockSchedulerVariant ScheduleVariant)17260b57cec5SDimitry Andric SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
17270b57cec5SDimitry Andric                              SISchedulerBlockSchedulerVariant ScheduleVariant) {
17280b57cec5SDimitry Andric   SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
17290b57cec5SDimitry Andric   SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
17300b57cec5SDimitry Andric   std::vector<SIScheduleBlock*> ScheduledBlocks;
17310b57cec5SDimitry Andric   struct SIScheduleBlockResult Res;
17320b57cec5SDimitry Andric 
17330b57cec5SDimitry Andric   ScheduledBlocks = Scheduler.getBlocks();
17340b57cec5SDimitry Andric 
17350eae32dcSDimitry Andric   for (SIScheduleBlock *Block : ScheduledBlocks) {
17360b57cec5SDimitry Andric     std::vector<SUnit*> SUs = Block->getScheduledUnits();
17370b57cec5SDimitry Andric 
17380b57cec5SDimitry Andric     for (SUnit* SU : SUs)
17390b57cec5SDimitry Andric       Res.SUs.push_back(SU->NodeNum);
17400b57cec5SDimitry Andric   }
17410b57cec5SDimitry Andric 
17420b57cec5SDimitry Andric   Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
17430b57cec5SDimitry Andric   Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
17440b57cec5SDimitry Andric   return Res;
17450b57cec5SDimitry Andric }
17460b57cec5SDimitry Andric 
17470b57cec5SDimitry Andric // SIScheduleDAGMI //
17480b57cec5SDimitry Andric 
SIScheduleDAGMI(MachineSchedContext * C)17490b57cec5SDimitry Andric SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
17508bcb0991SDimitry Andric   ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
17510b57cec5SDimitry Andric   SITII = static_cast<const SIInstrInfo*>(TII);
17520b57cec5SDimitry Andric   SITRI = static_cast<const SIRegisterInfo*>(TRI);
17530b57cec5SDimitry Andric }
17540b57cec5SDimitry Andric 
17550b57cec5SDimitry Andric SIScheduleDAGMI::~SIScheduleDAGMI() = default;
17560b57cec5SDimitry Andric 
17570b57cec5SDimitry Andric // Code adapted from scheduleDAG.cpp
17580b57cec5SDimitry Andric // Does a topological sort over the SUs.
17590b57cec5SDimitry Andric // Both TopDown and BottomUp
topologicalSort()17600b57cec5SDimitry Andric void SIScheduleDAGMI::topologicalSort() {
17610b57cec5SDimitry Andric   Topo.InitDAGTopologicalSorting();
17620b57cec5SDimitry Andric 
17630b57cec5SDimitry Andric   TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
17640b57cec5SDimitry Andric   BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
17650b57cec5SDimitry Andric }
17660b57cec5SDimitry Andric 
17670b57cec5SDimitry Andric // Move low latencies further from their user without
17680b57cec5SDimitry Andric // increasing SGPR usage (in general)
17690b57cec5SDimitry Andric // This is to be replaced by a better pass that would
17700b57cec5SDimitry Andric // take into account SGPR usage (based on VGPR Usage
17710b57cec5SDimitry Andric // and the corresponding wavefront count), that would
17720b57cec5SDimitry Andric // try to merge groups of loads if it make sense, etc
moveLowLatencies()17730b57cec5SDimitry Andric void SIScheduleDAGMI::moveLowLatencies() {
17740b57cec5SDimitry Andric    unsigned DAGSize = SUnits.size();
17750b57cec5SDimitry Andric    int LastLowLatencyUser = -1;
17760b57cec5SDimitry Andric    int LastLowLatencyPos = -1;
17770b57cec5SDimitry Andric 
17780b57cec5SDimitry Andric    for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
17790b57cec5SDimitry Andric     SUnit *SU = &SUnits[ScheduledSUnits[i]];
17800b57cec5SDimitry Andric     bool IsLowLatencyUser = false;
17810b57cec5SDimitry Andric     unsigned MinPos = 0;
17820b57cec5SDimitry Andric 
17830b57cec5SDimitry Andric     for (SDep& PredDep : SU->Preds) {
17840b57cec5SDimitry Andric       SUnit *Pred = PredDep.getSUnit();
17850b57cec5SDimitry Andric       if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
17860b57cec5SDimitry Andric         IsLowLatencyUser = true;
17870b57cec5SDimitry Andric       }
17880b57cec5SDimitry Andric       if (Pred->NodeNum >= DAGSize)
17890b57cec5SDimitry Andric         continue;
17900b57cec5SDimitry Andric       unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
17910b57cec5SDimitry Andric       if (PredPos >= MinPos)
17920b57cec5SDimitry Andric         MinPos = PredPos + 1;
17930b57cec5SDimitry Andric     }
17940b57cec5SDimitry Andric 
17950b57cec5SDimitry Andric     if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
17960b57cec5SDimitry Andric       unsigned BestPos = LastLowLatencyUser + 1;
17970b57cec5SDimitry Andric       if ((int)BestPos <= LastLowLatencyPos)
17980b57cec5SDimitry Andric         BestPos = LastLowLatencyPos + 1;
17990b57cec5SDimitry Andric       if (BestPos < MinPos)
18000b57cec5SDimitry Andric         BestPos = MinPos;
18010b57cec5SDimitry Andric       if (BestPos < i) {
18020b57cec5SDimitry Andric         for (unsigned u = i; u > BestPos; --u) {
18030b57cec5SDimitry Andric           ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
18040b57cec5SDimitry Andric           ScheduledSUnits[u] = ScheduledSUnits[u-1];
18050b57cec5SDimitry Andric         }
18060b57cec5SDimitry Andric         ScheduledSUnits[BestPos] = SU->NodeNum;
18070b57cec5SDimitry Andric         ScheduledSUnitsInv[SU->NodeNum] = BestPos;
18080b57cec5SDimitry Andric       }
18090b57cec5SDimitry Andric       LastLowLatencyPos = BestPos;
18100b57cec5SDimitry Andric       if (IsLowLatencyUser)
18110b57cec5SDimitry Andric         LastLowLatencyUser = BestPos;
18120b57cec5SDimitry Andric     } else if (IsLowLatencyUser) {
18130b57cec5SDimitry Andric       LastLowLatencyUser = i;
18140b57cec5SDimitry Andric     // Moves COPY instructions on which depends
18150b57cec5SDimitry Andric     // the low latency instructions too.
18160b57cec5SDimitry Andric     } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
18170b57cec5SDimitry Andric       bool CopyForLowLat = false;
18180b57cec5SDimitry Andric       for (SDep& SuccDep : SU->Succs) {
18190b57cec5SDimitry Andric         SUnit *Succ = SuccDep.getSUnit();
18200b57cec5SDimitry Andric         if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
18210b57cec5SDimitry Andric           continue;
18220b57cec5SDimitry Andric         if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
18230b57cec5SDimitry Andric           CopyForLowLat = true;
18240b57cec5SDimitry Andric         }
18250b57cec5SDimitry Andric       }
18260b57cec5SDimitry Andric       if (!CopyForLowLat)
18270b57cec5SDimitry Andric         continue;
18280b57cec5SDimitry Andric       if (MinPos < i) {
18290b57cec5SDimitry Andric         for (unsigned u = i; u > MinPos; --u) {
18300b57cec5SDimitry Andric           ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
18310b57cec5SDimitry Andric           ScheduledSUnits[u] = ScheduledSUnits[u-1];
18320b57cec5SDimitry Andric         }
18330b57cec5SDimitry Andric         ScheduledSUnits[MinPos] = SU->NodeNum;
18340b57cec5SDimitry Andric         ScheduledSUnitsInv[SU->NodeNum] = MinPos;
18350b57cec5SDimitry Andric       }
18360b57cec5SDimitry Andric     }
18370b57cec5SDimitry Andric   }
18380b57cec5SDimitry Andric }
18390b57cec5SDimitry Andric 
restoreSULinksLeft()18400b57cec5SDimitry Andric void SIScheduleDAGMI::restoreSULinksLeft() {
18410b57cec5SDimitry Andric   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
18420b57cec5SDimitry Andric     SUnits[i].isScheduled = false;
18430b57cec5SDimitry Andric     SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
18440b57cec5SDimitry Andric     SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
18450b57cec5SDimitry Andric     SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
18460b57cec5SDimitry Andric     SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
18470b57cec5SDimitry Andric   }
18480b57cec5SDimitry Andric }
18490b57cec5SDimitry Andric 
18500b57cec5SDimitry Andric // Return the Vgpr and Sgpr usage corresponding to some virtual registers.
18510b57cec5SDimitry Andric template<typename _Iterator> void
fillVgprSgprCost(_Iterator First,_Iterator End,unsigned & VgprUsage,unsigned & SgprUsage)18520b57cec5SDimitry Andric SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
18530b57cec5SDimitry Andric                                   unsigned &VgprUsage, unsigned &SgprUsage) {
18540b57cec5SDimitry Andric   VgprUsage = 0;
18550b57cec5SDimitry Andric   SgprUsage = 0;
18560b57cec5SDimitry Andric   for (_Iterator RegI = First; RegI != End; ++RegI) {
1857e8d8bef9SDimitry Andric     Register Reg = *RegI;
18580b57cec5SDimitry Andric     // For now only track virtual registers
1859e8d8bef9SDimitry Andric     if (!Reg.isVirtual())
18600b57cec5SDimitry Andric       continue;
18610b57cec5SDimitry Andric     PSetIterator PSetI = MRI.getPressureSets(Reg);
18620b57cec5SDimitry Andric     for (; PSetI.isValid(); ++PSetI) {
18635ffd83dbSDimitry Andric       if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
18640b57cec5SDimitry Andric         VgprUsage += PSetI.getWeight();
18655ffd83dbSDimitry Andric       else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
18660b57cec5SDimitry Andric         SgprUsage += PSetI.getWeight();
18670b57cec5SDimitry Andric     }
18680b57cec5SDimitry Andric   }
18690b57cec5SDimitry Andric }
18700b57cec5SDimitry Andric 
schedule()18710b57cec5SDimitry Andric void SIScheduleDAGMI::schedule()
18720b57cec5SDimitry Andric {
18730b57cec5SDimitry Andric   SmallVector<SUnit*, 8> TopRoots, BotRoots;
18740b57cec5SDimitry Andric   SIScheduleBlockResult Best, Temp;
18750b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
18760b57cec5SDimitry Andric 
18770b57cec5SDimitry Andric   buildDAGWithRegPressure();
187806c3fb27SDimitry Andric   postProcessDAG();
1879753f127fSDimitry Andric 
18800b57cec5SDimitry Andric   LLVM_DEBUG(dump());
1881753f127fSDimitry Andric   if (PrintDAGs)
1882753f127fSDimitry Andric     dump();
1883753f127fSDimitry Andric   if (ViewMISchedDAGs)
1884753f127fSDimitry Andric     viewGraph();
18850b57cec5SDimitry Andric 
18860b57cec5SDimitry Andric   topologicalSort();
18870b57cec5SDimitry Andric   findRootsAndBiasEdges(TopRoots, BotRoots);
18880b57cec5SDimitry Andric   // We reuse several ScheduleDAGMI and ScheduleDAGMILive
18890b57cec5SDimitry Andric   // functions, but to make them happy we must initialize
18900b57cec5SDimitry Andric   // the default Scheduler implementation (even if we do not
18910b57cec5SDimitry Andric   // run it)
18920b57cec5SDimitry Andric   SchedImpl->initialize(this);
18930b57cec5SDimitry Andric   initQueues(TopRoots, BotRoots);
18940b57cec5SDimitry Andric 
18950b57cec5SDimitry Andric   // Fill some stats to help scheduling.
18960b57cec5SDimitry Andric 
18970b57cec5SDimitry Andric   SUnitsLinksBackup = SUnits;
18980b57cec5SDimitry Andric   IsLowLatencySU.clear();
18990b57cec5SDimitry Andric   LowLatencyOffset.clear();
19000b57cec5SDimitry Andric   IsHighLatencySU.clear();
19010b57cec5SDimitry Andric 
19020b57cec5SDimitry Andric   IsLowLatencySU.resize(SUnits.size(), 0);
19030b57cec5SDimitry Andric   LowLatencyOffset.resize(SUnits.size(), 0);
19040b57cec5SDimitry Andric   IsHighLatencySU.resize(SUnits.size(), 0);
19050b57cec5SDimitry Andric 
19060b57cec5SDimitry Andric   for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
19070b57cec5SDimitry Andric     SUnit *SU = &SUnits[i];
19080b57cec5SDimitry Andric     const MachineOperand *BaseLatOp;
19090b57cec5SDimitry Andric     int64_t OffLatReg;
19100b57cec5SDimitry Andric     if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
19110b57cec5SDimitry Andric       IsLowLatencySU[i] = 1;
19125ffd83dbSDimitry Andric       bool OffsetIsScalable;
19130b57cec5SDimitry Andric       if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
19145ffd83dbSDimitry Andric                                          OffsetIsScalable, TRI))
19150b57cec5SDimitry Andric         LowLatencyOffset[i] = OffLatReg;
19165ffd83dbSDimitry Andric     } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
19170b57cec5SDimitry Andric       IsHighLatencySU[i] = 1;
19180b57cec5SDimitry Andric   }
19190b57cec5SDimitry Andric 
19200b57cec5SDimitry Andric   SIScheduler Scheduler(this);
19210b57cec5SDimitry Andric   Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
19220b57cec5SDimitry Andric                                    SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
19230b57cec5SDimitry Andric 
19240b57cec5SDimitry Andric   // if VGPR usage is extremely high, try other good performing variants
19250b57cec5SDimitry Andric   // which could lead to lower VGPR usage
19260b57cec5SDimitry Andric   if (Best.MaxVGPRUsage > 180) {
19270b57cec5SDimitry Andric     static const std::pair<SISchedulerBlockCreatorVariant,
19280b57cec5SDimitry Andric                            SISchedulerBlockSchedulerVariant>
19290b57cec5SDimitry Andric         Variants[] = {
19300b57cec5SDimitry Andric       { LatenciesAlone, BlockRegUsageLatency },
19310b57cec5SDimitry Andric //      { LatenciesAlone, BlockRegUsage },
19320b57cec5SDimitry Andric       { LatenciesGrouped, BlockLatencyRegUsage },
19330b57cec5SDimitry Andric //      { LatenciesGrouped, BlockRegUsageLatency },
19340b57cec5SDimitry Andric //      { LatenciesGrouped, BlockRegUsage },
19350b57cec5SDimitry Andric       { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
19360b57cec5SDimitry Andric //      { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
19370b57cec5SDimitry Andric //      { LatenciesAlonePlusConsecutive, BlockRegUsage }
19380b57cec5SDimitry Andric     };
19390b57cec5SDimitry Andric     for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
19400b57cec5SDimitry Andric       Temp = Scheduler.scheduleVariant(v.first, v.second);
19410b57cec5SDimitry Andric       if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
19420b57cec5SDimitry Andric         Best = Temp;
19430b57cec5SDimitry Andric     }
19440b57cec5SDimitry Andric   }
19450b57cec5SDimitry Andric   // if VGPR usage is still extremely high, we may spill. Try other variants
19460b57cec5SDimitry Andric   // which are less performing, but that could lead to lower VGPR usage.
19470b57cec5SDimitry Andric   if (Best.MaxVGPRUsage > 200) {
19480b57cec5SDimitry Andric     static const std::pair<SISchedulerBlockCreatorVariant,
19490b57cec5SDimitry Andric                            SISchedulerBlockSchedulerVariant>
19500b57cec5SDimitry Andric         Variants[] = {
19510b57cec5SDimitry Andric //      { LatenciesAlone, BlockRegUsageLatency },
19520b57cec5SDimitry Andric       { LatenciesAlone, BlockRegUsage },
19530b57cec5SDimitry Andric //      { LatenciesGrouped, BlockLatencyRegUsage },
19540b57cec5SDimitry Andric       { LatenciesGrouped, BlockRegUsageLatency },
19550b57cec5SDimitry Andric       { LatenciesGrouped, BlockRegUsage },
19560b57cec5SDimitry Andric //      { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
19570b57cec5SDimitry Andric       { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
19580b57cec5SDimitry Andric       { LatenciesAlonePlusConsecutive, BlockRegUsage }
19590b57cec5SDimitry Andric     };
19600b57cec5SDimitry Andric     for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
19610b57cec5SDimitry Andric       Temp = Scheduler.scheduleVariant(v.first, v.second);
19620b57cec5SDimitry Andric       if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
19630b57cec5SDimitry Andric         Best = Temp;
19640b57cec5SDimitry Andric     }
19650b57cec5SDimitry Andric   }
19660b57cec5SDimitry Andric 
19670b57cec5SDimitry Andric   ScheduledSUnits = Best.SUs;
19680b57cec5SDimitry Andric   ScheduledSUnitsInv.resize(SUnits.size());
19690b57cec5SDimitry Andric 
19700b57cec5SDimitry Andric   for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
19710b57cec5SDimitry Andric     ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
19720b57cec5SDimitry Andric   }
19730b57cec5SDimitry Andric 
19740b57cec5SDimitry Andric   moveLowLatencies();
19750b57cec5SDimitry Andric 
19760b57cec5SDimitry Andric   // Tell the outside world about the result of the scheduling.
19770b57cec5SDimitry Andric 
19780b57cec5SDimitry Andric   assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
19790b57cec5SDimitry Andric   TopRPTracker.setPos(CurrentTop);
19800b57cec5SDimitry Andric 
19810eae32dcSDimitry Andric   for (unsigned I : ScheduledSUnits) {
19820eae32dcSDimitry Andric     SUnit *SU = &SUnits[I];
19830b57cec5SDimitry Andric 
19840b57cec5SDimitry Andric     scheduleMI(SU, true);
19850b57cec5SDimitry Andric 
19860b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
19870b57cec5SDimitry Andric                       << *SU->getInstr());
19880b57cec5SDimitry Andric   }
19890b57cec5SDimitry Andric 
19900b57cec5SDimitry Andric   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
19910b57cec5SDimitry Andric 
19920b57cec5SDimitry Andric   placeDebugValues();
19930b57cec5SDimitry Andric 
19940b57cec5SDimitry Andric   LLVM_DEBUG({
19950b57cec5SDimitry Andric     dbgs() << "*** Final schedule for "
19960b57cec5SDimitry Andric            << printMBBReference(*begin()->getParent()) << " ***\n";
19970b57cec5SDimitry Andric     dumpSchedule();
19980b57cec5SDimitry Andric     dbgs() << '\n';
19990b57cec5SDimitry Andric   });
20000b57cec5SDimitry Andric }
2001