1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// SI Machine Scheduler interface 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 16 17 #include "llvm/CodeGen/MachineScheduler.h" 18 #include "llvm/CodeGen/RegisterPressure.h" 19 #include "llvm/CodeGen/ScheduleDAG.h" 20 #include <cstdint> 21 #include <set> 22 #include <vector> 23 24 namespace llvm { 25 26 class SIInstrInfo; 27 class SIRegisterInfo; 28 29 enum SIScheduleCandReason { 30 NoCand, 31 RegUsage, 32 Latency, 33 Successor, 34 Depth, 35 NodeOrder 36 }; 37 38 struct SISchedulerCandidate { 39 // The reason for this candidate. 40 SIScheduleCandReason Reason = NoCand; 41 42 // Set of reasons that apply to multiple candidates. 43 uint32_t RepeatReasonSet = 0; 44 45 SISchedulerCandidate() = default; 46 47 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); } 48 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } 49 }; 50 51 class SIScheduleDAGMI; 52 class SIScheduleBlockCreator; 53 54 enum SIScheduleBlockLinkKind { 55 NoData, 56 Data 57 }; 58 59 class SIScheduleBlock { 60 SIScheduleDAGMI *DAG; 61 SIScheduleBlockCreator *BC; 62 63 std::vector<SUnit*> SUnits; 64 std::map<unsigned, unsigned> NodeNum2Index; 65 std::vector<SUnit*> TopReadySUs; 66 std::vector<SUnit*> ScheduledSUnits; 67 68 /// The top of the unscheduled zone. 69 IntervalPressure TopPressure; 70 RegPressureTracker TopRPTracker; 71 72 // Pressure: number of said class of registers needed to 73 // store the live virtual and real registers. 74 // We do care only of SGPR32 and VGPR32 and do track only virtual registers. 75 // Pressure of additional registers required inside the block. 76 std::vector<unsigned> InternalAdditionnalPressure; 77 // Pressure of input and output registers 78 std::vector<unsigned> LiveInPressure; 79 std::vector<unsigned> LiveOutPressure; 80 // Registers required by the block, and outputs. 81 // We do track only virtual registers. 82 // Note that some registers are not 32 bits, 83 // and thus the pressure is not equal 84 // to the number of live registers. 85 std::set<unsigned> LiveInRegs; 86 std::set<unsigned> LiveOutRegs; 87 88 bool Scheduled = false; 89 bool HighLatencyBlock = false; 90 91 std::vector<unsigned> HasLowLatencyNonWaitedParent; 92 93 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table. 94 unsigned ID; 95 96 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors. 97 // All blocks successors, and the kind of link 98 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs; 99 unsigned NumHighLatencySuccessors = 0; 100 101 public: 102 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, 103 unsigned ID): 104 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {} 105 106 ~SIScheduleBlock() = default; 107 108 unsigned getID() const { return ID; } 109 110 /// Functions for Block construction. 111 void addUnit(SUnit *SU); 112 113 // When all SUs have been added. 114 void finalizeUnits(); 115 116 // Add block pred, which has instruction predecessor of SU. 117 void addPred(SIScheduleBlock *Pred); 118 void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind); 119 120 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; } 121 ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> 122 getSuccs() const { return Succs; } 123 124 unsigned Height; // Maximum topdown path length to block without outputs 125 unsigned Depth; // Maximum bottomup path length to block without inputs 126 127 unsigned getNumHighLatencySuccessors() const { 128 return NumHighLatencySuccessors; 129 } 130 131 bool isHighLatencyBlock() { return HighLatencyBlock; } 132 133 // This is approximative. 134 // Ideally should take into accounts some instructions (rcp, etc) 135 // are 4 times slower. 136 int getCost() { return SUnits.size(); } 137 138 // The block Predecessors and Successors must be all registered 139 // before fastSchedule(). 140 // Fast schedule with no particular requirement. 141 void fastSchedule(); 142 143 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; } 144 145 // Complete schedule that will try to minimize reg pressure and 146 // low latencies, and will fill liveins and liveouts. 147 // Needs all MIs to be grouped between BeginBlock and EndBlock. 148 // The MIs can be moved after the scheduling, 149 // it is just used to allow correct track of live registers. 150 void schedule(MachineBasicBlock::iterator BeginBlock, 151 MachineBasicBlock::iterator EndBlock); 152 153 bool isScheduled() { return Scheduled; } 154 155 // Needs the block to be scheduled inside 156 // TODO: find a way to compute it. 157 std::vector<unsigned> &getInternalAdditionnalRegUsage() { 158 return InternalAdditionnalPressure; 159 } 160 161 std::set<unsigned> &getInRegs() { return LiveInRegs; } 162 std::set<unsigned> &getOutRegs() { return LiveOutRegs; } 163 164 void printDebug(bool Full); 165 166 private: 167 struct SISchedCandidate : SISchedulerCandidate { 168 // The best SUnit candidate. 169 SUnit *SU = nullptr; 170 171 unsigned SGPRUsage; 172 unsigned VGPRUsage; 173 bool IsLowLatency; 174 unsigned LowLatencyOffset; 175 bool HasLowLatencyNonWaitedParent; 176 177 SISchedCandidate() = default; 178 179 bool isValid() const { return SU; } 180 181 // Copy the status of another candidate without changing policy. 182 void setBest(SISchedCandidate &Best) { 183 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 184 SU = Best.SU; 185 Reason = Best.Reason; 186 SGPRUsage = Best.SGPRUsage; 187 VGPRUsage = Best.VGPRUsage; 188 IsLowLatency = Best.IsLowLatency; 189 LowLatencyOffset = Best.LowLatencyOffset; 190 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent; 191 } 192 }; 193 194 void undoSchedule(); 195 196 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge); 197 void releaseSucc(SUnit *SU, SDep *SuccEdge); 198 // InOrOutBlock: restrict to links pointing inside the block (true), 199 // or restrict to links pointing outside the block (false). 200 void releaseSuccessors(SUnit *SU, bool InOrOutBlock); 201 202 void nodeScheduled(SUnit *SU); 203 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand); 204 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand); 205 SUnit* pickNode(); 206 void traceCandidate(const SISchedCandidate &Cand); 207 void initRegPressure(MachineBasicBlock::iterator BeginBlock, 208 MachineBasicBlock::iterator EndBlock); 209 }; 210 211 struct SIScheduleBlocks { 212 std::vector<SIScheduleBlock*> Blocks; 213 std::vector<int> TopDownIndex2Block; 214 std::vector<int> TopDownBlock2Index; 215 }; 216 217 enum SISchedulerBlockCreatorVariant { 218 LatenciesAlone, 219 LatenciesGrouped, 220 LatenciesAlonePlusConsecutive 221 }; 222 223 class SIScheduleBlockCreator { 224 SIScheduleDAGMI *DAG; 225 // unique_ptr handles freeing memory for us. 226 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs; 227 std::map<SISchedulerBlockCreatorVariant, 228 SIScheduleBlocks> Blocks; 229 std::vector<SIScheduleBlock*> CurrentBlocks; 230 std::vector<int> Node2CurrentBlock; 231 232 // Topological sort 233 // Maps topological index to the node number. 234 std::vector<int> TopDownIndex2Block; 235 std::vector<int> TopDownBlock2Index; 236 std::vector<int> BottomUpIndex2Block; 237 238 // 0 -> Color not given. 239 // 1 to SUnits.size() -> Reserved group (you should only add elements to them). 240 // Above -> Other groups. 241 int NextReservedID; 242 int NextNonReservedID; 243 std::vector<int> CurrentColoring; 244 std::vector<int> CurrentTopDownReservedDependencyColoring; 245 std::vector<int> CurrentBottomUpReservedDependencyColoring; 246 247 public: 248 SIScheduleBlockCreator(SIScheduleDAGMI *DAG); 249 250 SIScheduleBlocks 251 getBlocks(SISchedulerBlockCreatorVariant BlockVariant); 252 253 bool isSUInBlock(SUnit *SU, unsigned ID); 254 255 private: 256 // Give a Reserved color to every high latency. 257 void colorHighLatenciesAlone(); 258 259 // Create groups of high latencies with a Reserved color. 260 void colorHighLatenciesGroups(); 261 262 // Compute coloring for topdown and bottom traversals with 263 // different colors depending on dependencies on Reserved colors. 264 void colorComputeReservedDependencies(); 265 266 // Give color to all non-colored SUs according to Reserved groups dependencies. 267 void colorAccordingToReservedDependencies(); 268 269 // Divides Blocks having no bottom up or top down dependencies on Reserved groups. 270 // The new colors are computed according to the dependencies on the other blocks 271 // formed with colorAccordingToReservedDependencies. 272 void colorEndsAccordingToDependencies(); 273 274 // Cut groups into groups with SUs in consecutive order (except for Reserved groups). 275 void colorForceConsecutiveOrderInGroup(); 276 277 // Merge Constant loads that have all their users into another group to the group. 278 // (TODO: else if all their users depend on the same group, put them there) 279 void colorMergeConstantLoadsNextGroup(); 280 281 // Merge SUs that have all their users into another group to the group 282 void colorMergeIfPossibleNextGroup(); 283 284 // Merge SUs that have all their users into another group to the group, 285 // but only for Reserved groups. 286 void colorMergeIfPossibleNextGroupOnlyForReserved(); 287 288 // Merge SUs that have all their users into another group to the group, 289 // but only if the group is no more than a few SUs. 290 void colorMergeIfPossibleSmallGroupsToNextGroup(); 291 292 // Divides Blocks with important size. 293 // Idea of implementation: attribute new colors depending on topdown and 294 // bottom up links to other blocks. 295 void cutHugeBlocks(); 296 297 // Put in one group all instructions with no users in this scheduling region 298 // (we'd want these groups be at the end). 299 void regroupNoUserInstructions(); 300 301 // Give Reserved color to export instructions 302 void colorExports(); 303 304 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant); 305 306 void topologicalSort(); 307 308 void scheduleInsideBlocks(); 309 310 void fillStats(); 311 }; 312 313 enum SISchedulerBlockSchedulerVariant { 314 BlockLatencyRegUsage, 315 BlockRegUsageLatency, 316 BlockRegUsage 317 }; 318 319 class SIScheduleBlockScheduler { 320 SIScheduleDAGMI *DAG; 321 SISchedulerBlockSchedulerVariant Variant; 322 std::vector<SIScheduleBlock*> Blocks; 323 324 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages; 325 std::set<unsigned> LiveRegs; 326 // Num of schedulable unscheduled blocks reading the register. 327 std::map<unsigned, unsigned> LiveRegsConsumers; 328 329 std::vector<unsigned> LastPosHighLatencyParentScheduled; 330 int LastPosWaitedHighLatency; 331 332 std::vector<SIScheduleBlock*> BlocksScheduled; 333 unsigned NumBlockScheduled; 334 std::vector<SIScheduleBlock*> ReadyBlocks; 335 336 unsigned VregCurrentUsage; 337 unsigned SregCurrentUsage; 338 339 // Currently is only approximation. 340 unsigned maxVregUsage; 341 unsigned maxSregUsage; 342 343 std::vector<unsigned> BlockNumPredsLeft; 344 std::vector<unsigned> BlockNumSuccsLeft; 345 346 public: 347 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, 348 SISchedulerBlockSchedulerVariant Variant, 349 SIScheduleBlocks BlocksStruct); 350 ~SIScheduleBlockScheduler() = default; 351 352 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; } 353 354 unsigned getVGPRUsage() { return maxVregUsage; } 355 unsigned getSGPRUsage() { return maxSregUsage; } 356 357 private: 358 struct SIBlockSchedCandidate : SISchedulerCandidate { 359 // The best Block candidate. 360 SIScheduleBlock *Block = nullptr; 361 362 bool IsHighLatency; 363 int VGPRUsageDiff; 364 unsigned NumSuccessors; 365 unsigned NumHighLatencySuccessors; 366 unsigned LastPosHighLatParentScheduled; 367 unsigned Height; 368 369 SIBlockSchedCandidate() = default; 370 371 bool isValid() const { return Block; } 372 373 // Copy the status of another candidate without changing policy. 374 void setBest(SIBlockSchedCandidate &Best) { 375 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 376 Block = Best.Block; 377 Reason = Best.Reason; 378 IsHighLatency = Best.IsHighLatency; 379 VGPRUsageDiff = Best.VGPRUsageDiff; 380 NumSuccessors = Best.NumSuccessors; 381 NumHighLatencySuccessors = Best.NumHighLatencySuccessors; 382 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled; 383 Height = Best.Height; 384 } 385 }; 386 387 bool tryCandidateLatency(SIBlockSchedCandidate &Cand, 388 SIBlockSchedCandidate &TryCand); 389 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand, 390 SIBlockSchedCandidate &TryCand); 391 SIScheduleBlock *pickBlock(); 392 393 void addLiveRegs(std::set<unsigned> &Regs); 394 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs); 395 void releaseBlockSuccs(SIScheduleBlock *Parent); 396 void blockScheduled(SIScheduleBlock *Block); 397 398 // Check register pressure change 399 // by scheduling a block with these LiveIn and LiveOut. 400 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs, 401 std::set<unsigned> &OutRegs); 402 403 void schedule(); 404 }; 405 406 struct SIScheduleBlockResult { 407 std::vector<unsigned> SUs; 408 unsigned MaxSGPRUsage; 409 unsigned MaxVGPRUsage; 410 }; 411 412 class SIScheduler { 413 SIScheduleDAGMI *DAG; 414 SIScheduleBlockCreator BlockCreator; 415 416 public: 417 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {} 418 419 ~SIScheduler() = default; 420 421 struct SIScheduleBlockResult 422 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, 423 SISchedulerBlockSchedulerVariant ScheduleVariant); 424 }; 425 426 class SIScheduleDAGMI final : public ScheduleDAGMILive { 427 const SIInstrInfo *SITII; 428 const SIRegisterInfo *SITRI; 429 430 std::vector<SUnit> SUnitsLinksBackup; 431 432 // For moveLowLatencies. After all Scheduling variants are tested. 433 std::vector<unsigned> ScheduledSUnits; 434 std::vector<unsigned> ScheduledSUnitsInv; 435 436 public: 437 SIScheduleDAGMI(MachineSchedContext *C); 438 439 ~SIScheduleDAGMI() override; 440 441 // Entry point for the schedule. 442 void schedule() override; 443 444 // To init Block's RPTracker. 445 void initRPTracker(RegPressureTracker &RPTracker) { 446 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); 447 } 448 449 MachineBasicBlock *getBB() { return BB; } 450 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; } 451 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; } 452 LiveIntervals *getLIS() { return LIS; } 453 MachineRegisterInfo *getMRI() { return &MRI; } 454 const TargetRegisterInfo *getTRI() { return TRI; } 455 ScheduleDAGTopologicalSort *GetTopo() { return &Topo; } 456 SUnit &getEntrySU() { return EntrySU; } 457 SUnit& getExitSU() { return ExitSU; } 458 459 void restoreSULinksLeft(); 460 461 template<typename _Iterator> void fillVgprSgprCost(_Iterator First, 462 _Iterator End, 463 unsigned &VgprUsage, 464 unsigned &SgprUsage); 465 466 std::set<unsigned> getInRegs() { 467 std::set<unsigned> InRegs; 468 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { 469 InRegs.insert(RegMaskPair.RegUnit); 470 } 471 return InRegs; 472 } 473 474 std::set<unsigned> getOutRegs() { 475 std::set<unsigned> OutRegs; 476 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { 477 OutRegs.insert(RegMaskPair.RegUnit); 478 } 479 return OutRegs; 480 }; 481 482 private: 483 void topologicalSort(); 484 // After scheduling is done, improve low latency placements. 485 void moveLowLatencies(); 486 487 public: 488 // Some stats for scheduling inside blocks. 489 std::vector<unsigned> IsLowLatencySU; 490 std::vector<unsigned> LowLatencyOffset; 491 std::vector<unsigned> IsHighLatencySU; 492 // Topological sort 493 // Maps topological index to the node number. 494 std::vector<int> TopDownIndex2SU; 495 std::vector<int> BottomUpIndex2SU; 496 }; 497 498 } // end namespace llvm 499 500 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 501