Lines Matching +full:max +full:- +full:reason
1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
18 /// Generally, the reason for having multiple scheduling stages is to account
19 /// for the kernel-wide effect of register usage on occupancy. Usually, only a
24 //===----------------------------------------------------------------------===//
31 #define DEBUG_TYPE "machine-scheduler"
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
48 "amdgpu-schedule-metric-bias", cl::Hidden,
55 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
70 MF = &DAG->MF; in initialize()
72 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); in initialize()
75 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); in initialize()
77 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); in initialize()
79 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); in initialize()
103 VGPRBudget = std::max(VGPRBudget, Granule); in initialize()
108 SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit); in initialize()
109 VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit); in initialize()
110 SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit); in initialize()
111 VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit); in initialize()
124 /// - Explicit physical register operands
125 /// - Subregister definitions
141 for (const auto &Op : SU.getInstr()->operands()) { in canUsePressureDiffs()
156 // the tracker, so we need to pass those function a non-const copy. in getRegisterPressures()
159 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); in getRegisterPressures()
161 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); in getRegisterPressures()
173 if (!DAG->isTrackingPressure()) in initCandidate()
198 for (const auto &Diff : DAG->getPressureDiff(SU)) { in initCandidate()
201 // PressureDiffs is always bottom-up so if we're working top-down we need in initCandidate()
204 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc()); in initCandidate()
242 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold in initCandidate()
254 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); in initCandidate()
260 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); in initCandidate()
268 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; in initCandidate()
269 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; in initCandidate()
296 if (DAG->isTrackingPressure()) { in pickNodeFromQueue()
309 if (TryCand.Reason != NoCand) { in pickNodeFromQueue()
332 // Set the bottom-up policy based on the state of the current bottom zone and in pickNodeBidirectional()
336 // Set the top-down policy based on the state of the current top zone and in pickNodeBidirectional()
343 if (!BotCand.isValid() || BotCand.SU->isScheduled || in pickNodeBidirectional()
346 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand, in pickNodeBidirectional()
348 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); in pickNodeBidirectional()
355 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand, in pickNodeBidirectional()
358 "Last pick result should correspond to re-picking right now"); in pickNodeBidirectional()
365 if (!TopCand.isValid() || TopCand.SU->isScheduled || in pickNodeBidirectional()
368 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand, in pickNodeBidirectional()
370 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); in pickNodeBidirectional()
377 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand, in pickNodeBidirectional()
380 "Last pick result should correspond to re-picking right now"); in pickNodeBidirectional()
389 TopCand.Reason = NoCand; in pickNodeBidirectional()
391 if (TopCand.Reason != NoCand) { in pickNodeBidirectional()
403 if (DAG->top() == DAG->bottom()) { in pickNode()
415 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand, in pickNode()
417 assert(TopCand.Reason != NoCand && "failed to find a candidate"); in pickNode()
426 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand, in pickNode()
428 assert(BotCand.Reason != NoCand && "failed to find a candidate"); in pickNode()
435 } while (SU->isScheduled); in pickNode()
437 if (SU->isTopReady()) in pickNode()
439 if (SU->isBottomReady()) in pickNode()
442 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " in pickNode()
443 << *SU->getInstr()); in pickNode()
491 TryCand.Reason = NodeOrder; in tryCandidate()
496 if (DAG->isTrackingPressure() && in tryCandidate()
498 RegExcess, TRI, DAG->MF)) in tryCandidate()
499 return TryCand.Reason != NoCand; in tryCandidate()
504 return TryCand.Reason != NoCand; in tryCandidate()
509 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), in tryCandidate()
510 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) in tryCandidate()
511 return TryCand.Reason != NoCand; in tryCandidate()
517 return TryCand.Reason != NoCand; in tryCandidate()
521 return TryCand.Reason != NoCand; in tryCandidate()
525 return TryCand.Reason != NoCand; in tryCandidate()
530 return TryCand.Reason != NoCand; in tryCandidate()
536 // This is a best effort to set things up for a post-RA pass. Optimizations in tryCandidate()
540 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); in tryCandidate()
542 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); in tryCandidate()
545 return TryCand.Reason != NoCand; in tryCandidate()
547 // Avoid increasing the max critical pressure in the scheduled region. in tryCandidate()
548 if (DAG->isTrackingPressure() && in tryCandidate()
550 TryCand, Cand, RegCritical, TRI, DAG->MF)) in tryCandidate()
551 return TryCand.Reason != NoCand; in tryCandidate()
553 // Avoid increasing the max pressure of the entire region. in tryCandidate()
554 if (DAG->isTrackingPressure() && in tryCandidate()
556 Cand, RegMax, TRI, DAG->MF)) in tryCandidate()
557 return TryCand.Reason != NoCand; in tryCandidate()
561 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || in tryCandidate()
562 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { in tryCandidate()
563 TryCand.Reason = NodeOrder; in tryCandidate()
620 // If the block has the only successor then live-ins of that successor are in computeBlockPressure()
621 // live-outs of the current block. We can reuse calculated live set if the in computeBlockPressure()
626 // a live-out register. Workaround that by sticking to one-to-one relationship in computeBlockPressure()
629 if (MBB->succ_size() == 1) { in computeBlockPressure()
630 auto *Candidate = *MBB->succ_begin(); in computeBlockPressure()
631 if (!Candidate->empty() && Candidate->pred_size() == 1) { in computeBlockPressure()
632 SlotIndexes *Ind = LIS->getSlotIndexes(); in computeBlockPressure()
633 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate)) in computeBlockPressure()
641 if (Regions[CurRegion].first->getParent() != MBB) in computeBlockPressure()
643 --CurRegion; in computeBlockPressure()
645 auto I = MBB->begin(); in computeBlockPressure()
650 auto LiveIn = std::move(LiveInIt->second); in computeBlockPressure()
651 RPTracker.reset(*MBB->begin(), &LiveIn); in computeBlockPressure()
672 if (CurRegion-- == RegionIdx) in computeBlockPressure()
680 if (I != MBB->end()) { in computeBlockPressure()
682 RPTracker.advance(MBB->end()); in computeBlockPressure()
695 auto *BB = I->first->getParent(); in getBBLiveInMap()
697 auto *MI = &*skipDebugInstructionsForward(I->first, I->second); in getBBLiveInMap()
701 } while (I != E && I->first->getParent() == BB); in getBBLiveInMap()
735 if (!Stage->initGCNSchedStage()) in runSchedStages()
742 if (!Stage->initGCNRegion()) { in runSchedStages()
743 Stage->advanceRegion(); in runSchedStages()
749 Stage->finalizeGCNRegion(); in runSchedStages()
752 Stage->finalizeGCNSchedStage(); in runSchedStages()
760 OS << "Max Occupancy Initial Schedule"; in operator <<()
769 OS << "Pre-RA Rematerialize"; in operator <<()
772 OS << "Max ILP Initial Schedule"; in operator <<()
894 if (DAG.RegionBegin->getParent() != CurrentMBB) in initGCNRegion()
906 << " " << CurrentMBB->getName() in initGCNRegion()
908 if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd; in initGCNRegion()
931 dbgs() << "Pressure before scheduling:\nRegion live-ins:" in initGCNRegion()
933 << "Region live-in pressure: " in initGCNRegion()
988 CurrentMBB = DAG.RegionBegin->getParent(); in setupNewBlock()
1004 // reason that the original schedule is better. in finalizeGCNRegion()
1045 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); in checkScheduling()
1063 // The maximum number of arch VGPR on non-unified register file, or the in checkScheduling()
1066 // The maximum number of arch VGPR for both unified and non-unified register in checkScheduling()
1098 MachineInstr *DefMI = D.getSUnit()->getInstr(); in computeSUnitReadyCycle()
1100 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum]; in computeSUnitReadyCycle()
1101 ReadyCycle = std::max(ReadyCycle, DefReady + Latency); in computeSUnitReadyCycle()
1120 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber(); in printScheduleModel()
1128 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev in printScheduleModel()
1142 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); in getScheduleMetrics()
1149 SumBubbles += ReadyCycle - CurrCycle; in getScheduleMetrics()
1175 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); in getScheduleMetrics()
1185 SumBubbles += ReadyCycle - CurrCycle; in getScheduleMetrics()
1187 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle)); in getScheduleMetrics()
1313 if (MI->isDebugInstr()) { in revertScheduling()
1318 if (MI->getIterator() != DAG.RegionEnd) { in revertScheduling()
1319 DAG.BB->remove(MI); in revertScheduling()
1320 DAG.BB->insert(DAG.RegionEnd, MI); in revertScheduling()
1321 if (!MI->isDebugInstr()) in revertScheduling()
1322 DAG.LIS->handleMove(*MI, true); in revertScheduling()
1325 // Reset read-undef flags and update them later. in revertScheduling()
1326 for (auto &Op : MI->all_defs()) in revertScheduling()
1330 if (!MI->isDebugInstr()) { in revertScheduling()
1332 // Adjust liveness and add missing dead+read-undef flags. in revertScheduling()
1333 SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot(); in revertScheduling()
1336 // Adjust for missing dead-def flags. in revertScheduling()
1340 DAG.RegionEnd = MI->getIterator(); in revertScheduling()
1348 while (SkippedDebugInstr-- > 0) in revertScheduling()
1354 DAG.RegionBegin = Unsched.front()->getIterator(); in revertScheduling()
1355 if (DAG.RegionBegin->isDebugInstr()) { in revertScheduling()
1357 if (MI->isDebugInstr()) in revertScheduling()
1359 DAG.RegionBegin = MI->getIterator(); in revertScheduling()
1375 if (!DAG.LIS->hasInterval(Reg)) in collectRematerializableInstructions()
1379 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) || in collectRematerializableInstructions()
1384 MachineInstr *Def = Op->getParent(); in collectRematerializableInstructions()
1385 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) in collectRematerializableInstructions()
1389 if (Def->getParent() == UseI->getParent()) in collectRematerializableInstructions()
1393 // live-through or used inside regions at MinOccupancy. This means that the in collectRematerializableInstructions()
1394 // register must be in the live-in set for the region. in collectRematerializableInstructions()
1398 if (It != DAG.LiveIns[I].end() && !It->second.none()) { in collectRematerializableInstructions()
1404 // Collect regions with rematerializable reg as live-in to avoid in collectRematerializableInstructions()
1429 // Collect only regions that has a rematerializable def as a live-in. in sinkTriviallyRematInsts()
1434 // Make copies of register pressure and live-ins cache that will be updated in sinkTriviallyRematInsts()
1472 Register DefReg = Def->getOperand(0).getReg(); in sinkTriviallyRematInsts()
1476 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs; in sinkTriviallyRematInsts()
1489 Register Reg = Def->getOperand(0).getReg(); in sinkTriviallyRematInsts()
1494 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, in sinkTriviallyRematInsts()
1495 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI); in sinkTriviallyRematInsts()
1497 LIS->InsertMachineInstrInMaps(*NewMI); in sinkTriviallyRematInsts()
1498 LIS->removeInterval(Reg); in sinkTriviallyRematInsts()
1499 LIS->createAndComputeVirtRegInterval(Reg); in sinkTriviallyRematInsts()
1512 // Update RP for all regions that has this reg as a live-in and remove in sinkTriviallyRematInsts()
1513 // the reg from all regions as a live-in. in sinkTriviallyRematInsts()
1516 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) { in sinkTriviallyRematInsts()
1517 // Def is live-through and not used in this block. in sinkTriviallyRematInsts()
1554 Register Reg = MI->getOperand(0).getReg(); in sinkTriviallyRematInsts()
1555 LIS->RemoveMachineInstrFromMaps(*MI); in sinkTriviallyRematInsts()
1556 MI->eraseFromParent(); in sinkTriviallyRematInsts()
1557 OldMI->clearRegisterDeads(Reg); in sinkTriviallyRematInsts()
1558 LIS->removeInterval(Reg); in sinkTriviallyRematInsts()
1559 LIS->createAndComputeVirtRegInterval(Reg); in sinkTriviallyRematInsts()
1573 Register Reg = MI->getOperand(0).getReg(); in sinkTriviallyRematInsts()
1574 LIS->RemoveMachineInstrFromMaps(*OldMI); in sinkTriviallyRematInsts()
1575 OldMI->eraseFromParent(); in sinkTriviallyRematInsts()
1576 LIS->removeInterval(Reg); in sinkTriviallyRematInsts()
1577 LIS->createAndComputeVirtRegInterval(Reg); in sinkTriviallyRematInsts()
1580 // Update live-ins, register pressure, and regions caches. in sinkTriviallyRematInsts()
1584 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent()); in sinkTriviallyRematInsts()
1597 if (!DAG.TII->isTriviallyReMaterializable(MI)) in isTriviallyReMaterializable()
1617 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent()) in updateRegionBoundaries()
1621 if (MI->getParent() != RegionBoundaries[I].first->getParent()) in updateRegionBoundaries()
1629 std::pair(MI->getParent()->end(), MI->getParent()->end()); in updateRegionBoundaries()
1651 DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) { in hasIGLPInstrs()
1652 unsigned Opc = MI->getOpcode(); in hasIGLPInstrs()