1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ResourcePriorityQueue class, which is a 10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to 11 // reduce the length of the critical path through the basic block 12 // on VLIW platforms. 13 // The scheduler is basically a top-down adaptable list scheduler with DFA 14 // resource tracking added to the cost function. 15 // DFA is queried as a state machine to model "packets/bundles" during 16 // schedule. Currently packets/bundles are discarded at the end of 17 // scheduling, affecting only order of instructions. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/CodeGen/ResourcePriorityQueue.h" 22 #include "llvm/CodeGen/DFAPacketizer.h" 23 #include "llvm/CodeGen/SelectionDAGISel.h" 24 #include "llvm/CodeGen/SelectionDAGNodes.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSubtargetInfo.h" 29 #include "llvm/Support/CommandLine.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "scheduler" 34 35 static cl::opt<bool> 36 DisableDFASched("disable-dfa-sched", cl::Hidden, 37 cl::desc("Disable use of DFA during scheduling")); 38 39 static cl::opt<int> RegPressureThreshold( 40 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5), 41 cl::desc("Track reg pressure and switch priority to in-depth")); 42 43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) 44 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { 45 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 46 TRI = STI.getRegisterInfo(); 47 TLI = IS->TLI; 48 TII = STI.getInstrInfo(); 49 ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); 50 // This hard requirement could be relaxed, but for now 51 // do not let it proceed. 52 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 53 54 unsigned NumRC = TRI->getNumRegClasses(); 55 RegLimit.resize(NumRC); 56 RegPressure.resize(NumRC); 57 std::fill(RegLimit.begin(), RegLimit.end(), 0); 58 std::fill(RegPressure.begin(), RegPressure.end(), 0); 59 for (const TargetRegisterClass *RC : TRI->regclasses()) 60 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF); 61 62 ParallelLiveRanges = 0; 63 HorizontalVerticalBalance = 0; 64 } 65 66 unsigned 67 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { 68 unsigned NumberDeps = 0; 69 for (SDep &Pred : SU->Preds) { 70 if (Pred.isCtrl()) 71 continue; 72 73 SUnit *PredSU = Pred.getSUnit(); 74 const SDNode *ScegN = PredSU->getNode(); 75 76 if (!ScegN) 77 continue; 78 79 // If value is passed to CopyToReg, it is probably 80 // live outside BB. 81 switch (ScegN->getOpcode()) { 82 default: break; 83 case ISD::TokenFactor: break; 84 case ISD::CopyFromReg: NumberDeps++; break; 85 case ISD::CopyToReg: break; 86 case ISD::INLINEASM: break; 87 case ISD::INLINEASM_BR: break; 88 } 89 if (!ScegN->isMachineOpcode()) 90 continue; 91 92 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 93 MVT VT = ScegN->getSimpleValueType(i); 94 if (TLI->isTypeLegal(VT) 95 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 96 NumberDeps++; 97 break; 98 } 99 } 100 } 101 return NumberDeps; 102 } 103 104 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, 105 unsigned RCId) { 106 unsigned NumberDeps = 0; 107 for (const SDep &Succ : SU->Succs) { 108 if (Succ.isCtrl()) 109 continue; 110 111 SUnit *SuccSU = Succ.getSUnit(); 112 const SDNode *ScegN = SuccSU->getNode(); 113 if (!ScegN) 114 continue; 115 116 // If value is passed to CopyToReg, it is probably 117 // live outside BB. 118 switch (ScegN->getOpcode()) { 119 default: break; 120 case ISD::TokenFactor: break; 121 case ISD::CopyFromReg: break; 122 case ISD::CopyToReg: NumberDeps++; break; 123 case ISD::INLINEASM: break; 124 case ISD::INLINEASM_BR: break; 125 } 126 if (!ScegN->isMachineOpcode()) 127 continue; 128 129 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 130 const SDValue &Op = ScegN->getOperand(i); 131 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 132 if (TLI->isTypeLegal(VT) 133 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 134 NumberDeps++; 135 break; 136 } 137 } 138 } 139 return NumberDeps; 140 } 141 142 static unsigned numberCtrlDepsInSU(SUnit *SU) { 143 unsigned NumberDeps = 0; 144 for (const SDep &Succ : SU->Succs) 145 if (Succ.isCtrl()) 146 NumberDeps++; 147 148 return NumberDeps; 149 } 150 151 static unsigned numberCtrlPredInSU(SUnit *SU) { 152 unsigned NumberDeps = 0; 153 for (SDep &Pred : SU->Preds) 154 if (Pred.isCtrl()) 155 NumberDeps++; 156 157 return NumberDeps; 158 } 159 160 /// 161 /// Initialize nodes. 162 /// 163 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { 164 SUnits = &sunits; 165 NumNodesSolelyBlocking.resize(SUnits->size(), 0); 166 167 for (SUnit &SU : *SUnits) { 168 initNumRegDefsLeft(&SU); 169 SU.NodeQueueId = 0; 170 } 171 } 172 173 /// This heuristic is used if DFA scheduling is not desired 174 /// for some VLIW platform. 175 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { 176 // The isScheduleHigh flag allows nodes with wraparound dependencies that 177 // cannot easily be modeled as edges with latencies to be scheduled as 178 // soon as possible in a top-down schedule. 179 if (LHS->isScheduleHigh && !RHS->isScheduleHigh) 180 return false; 181 182 if (!LHS->isScheduleHigh && RHS->isScheduleHigh) 183 return true; 184 185 unsigned LHSNum = LHS->NodeNum; 186 unsigned RHSNum = RHS->NodeNum; 187 188 // The most important heuristic is scheduling the critical path. 189 unsigned LHSLatency = PQ->getLatency(LHSNum); 190 unsigned RHSLatency = PQ->getLatency(RHSNum); 191 if (LHSLatency < RHSLatency) return true; 192 if (LHSLatency > RHSLatency) return false; 193 194 // After that, if two nodes have identical latencies, look to see if one will 195 // unblock more other nodes than the other. 196 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); 197 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); 198 if (LHSBlocked < RHSBlocked) return true; 199 if (LHSBlocked > RHSBlocked) return false; 200 201 // Finally, just to provide a stable ordering, use the node number as a 202 // deciding factor. 203 return LHSNum < RHSNum; 204 } 205 206 207 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor 208 /// of SU, return it, otherwise return null. 209 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { 210 SUnit *OnlyAvailablePred = nullptr; 211 for (const SDep &Pred : SU->Preds) { 212 SUnit &PredSU = *Pred.getSUnit(); 213 if (!PredSU.isScheduled) { 214 // We found an available, but not scheduled, predecessor. If it's the 215 // only one we have found, keep track of it... otherwise give up. 216 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU) 217 return nullptr; 218 OnlyAvailablePred = &PredSU; 219 } 220 } 221 return OnlyAvailablePred; 222 } 223 224 void ResourcePriorityQueue::push(SUnit *SU) { 225 // Look at all of the successors of this node. Count the number of nodes that 226 // this node is the sole unscheduled node for. 227 unsigned NumNodesBlocking = 0; 228 for (const SDep &Succ : SU->Succs) 229 if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) 230 ++NumNodesBlocking; 231 232 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; 233 Queue.push_back(SU); 234 } 235 236 /// Check if scheduling of this SU is possible 237 /// in the current packet. 238 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { 239 if (!SU || !SU->getNode()) 240 return false; 241 242 // If this is a compound instruction, 243 // it is likely to be a call. Do not delay it. 244 if (SU->getNode()->getGluedNode()) 245 return true; 246 247 // First see if the pipeline could receive this instruction 248 // in the current cycle. 249 if (SU->getNode()->isMachineOpcode()) 250 switch (SU->getNode()->getMachineOpcode()) { 251 default: 252 if (!ResourcesModel->canReserveResources(&TII->get( 253 SU->getNode()->getMachineOpcode()))) 254 return false; 255 break; 256 case TargetOpcode::EXTRACT_SUBREG: 257 case TargetOpcode::INSERT_SUBREG: 258 case TargetOpcode::SUBREG_TO_REG: 259 case TargetOpcode::REG_SEQUENCE: 260 case TargetOpcode::IMPLICIT_DEF: 261 break; 262 } 263 264 // Now see if there are no other dependencies 265 // to instructions already in the packet. 266 for (const SUnit *S : Packet) 267 for (const SDep &Succ : S->Succs) { 268 // Since we do not add pseudos to packets, might as well 269 // ignore order deps. 270 if (Succ.isCtrl()) 271 continue; 272 273 if (Succ.getSUnit() == SU) 274 return false; 275 } 276 277 return true; 278 } 279 280 /// Keep track of available resources. 281 void ResourcePriorityQueue::reserveResources(SUnit *SU) { 282 // If this SU does not fit in the packet 283 // start a new one. 284 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { 285 ResourcesModel->clearResources(); 286 Packet.clear(); 287 } 288 289 if (SU->getNode() && SU->getNode()->isMachineOpcode()) { 290 switch (SU->getNode()->getMachineOpcode()) { 291 default: 292 ResourcesModel->reserveResources(&TII->get( 293 SU->getNode()->getMachineOpcode())); 294 break; 295 case TargetOpcode::EXTRACT_SUBREG: 296 case TargetOpcode::INSERT_SUBREG: 297 case TargetOpcode::SUBREG_TO_REG: 298 case TargetOpcode::REG_SEQUENCE: 299 case TargetOpcode::IMPLICIT_DEF: 300 break; 301 } 302 Packet.push_back(SU); 303 } 304 // Forcefully end packet for PseudoOps. 305 else { 306 ResourcesModel->clearResources(); 307 Packet.clear(); 308 } 309 310 // If packet is now full, reset the state so in the next cycle 311 // we start fresh. 312 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { 313 ResourcesModel->clearResources(); 314 Packet.clear(); 315 } 316 } 317 318 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { 319 int RegBalance = 0; 320 321 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 322 return RegBalance; 323 324 // Gen estimate. 325 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { 326 MVT VT = SU->getNode()->getSimpleValueType(i); 327 if (TLI->isTypeLegal(VT) 328 && TLI->getRegClassFor(VT) 329 && TLI->getRegClassFor(VT)->getID() == RCId) 330 RegBalance += numberRCValSuccInSU(SU, RCId); 331 } 332 // Kill estimate. 333 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { 334 const SDValue &Op = SU->getNode()->getOperand(i); 335 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 336 if (isa<ConstantSDNode>(Op.getNode())) 337 continue; 338 339 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) 340 && TLI->getRegClassFor(VT)->getID() == RCId) 341 RegBalance -= numberRCValPredInSU(SU, RCId); 342 } 343 return RegBalance; 344 } 345 346 /// Estimates change in reg pressure from this SU. 347 /// It is achieved by trivial tracking of defined 348 /// and used vregs in dependent instructions. 349 /// The RawPressure flag makes this function to ignore 350 /// existing reg file sizes, and report raw def/use 351 /// balance. 352 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { 353 int RegBalance = 0; 354 355 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 356 return RegBalance; 357 358 if (RawPressure) { 359 for (const TargetRegisterClass *RC : TRI->regclasses()) 360 RegBalance += rawRegPressureDelta(SU, RC->getID()); 361 } 362 else { 363 for (const TargetRegisterClass *RC : TRI->regclasses()) { 364 if ((RegPressure[RC->getID()] + 365 rawRegPressureDelta(SU, RC->getID()) > 0) && 366 (RegPressure[RC->getID()] + 367 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) 368 RegBalance += rawRegPressureDelta(SU, RC->getID()); 369 } 370 } 371 372 return RegBalance; 373 } 374 375 // Constants used to denote relative importance of 376 // heuristic components for cost computation. 377 static const unsigned PriorityOne = 200; 378 static const unsigned PriorityTwo = 50; 379 static const unsigned PriorityThree = 15; 380 static const unsigned PriorityFour = 5; 381 static const unsigned ScaleOne = 20; 382 static const unsigned ScaleTwo = 10; 383 static const unsigned ScaleThree = 5; 384 static const unsigned FactorOne = 2; 385 386 /// Returns single number reflecting benefit of scheduling SU 387 /// in the current cycle. 388 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { 389 // Initial trivial priority. 390 int ResCount = 1; 391 392 // Do not waste time on a node that is already scheduled. 393 if (SU->isScheduled) 394 return ResCount; 395 396 // Forced priority is high. 397 if (SU->isScheduleHigh) 398 ResCount += PriorityOne; 399 400 // Adaptable scheduling 401 // A small, but very parallel 402 // region, where reg pressure is an issue. 403 if (HorizontalVerticalBalance > RegPressureThreshold) { 404 // Critical path first 405 ResCount += (SU->getHeight() * ScaleTwo); 406 // If resources are available for it, multiply the 407 // chance of scheduling. 408 if (isResourceAvailable(SU)) 409 ResCount <<= FactorOne; 410 411 // Consider change to reg pressure from scheduling 412 // this SU. 413 ResCount -= (regPressureDelta(SU,true) * ScaleOne); 414 } 415 // Default heuristic, greeady and 416 // critical path driven. 417 else { 418 // Critical path first. 419 ResCount += (SU->getHeight() * ScaleTwo); 420 // Now see how many instructions is blocked by this SU. 421 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); 422 // If resources are available for it, multiply the 423 // chance of scheduling. 424 if (isResourceAvailable(SU)) 425 ResCount <<= FactorOne; 426 427 ResCount -= (regPressureDelta(SU) * ScaleTwo); 428 } 429 430 // These are platform-specific things. 431 // Will need to go into the back end 432 // and accessed from here via a hook. 433 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { 434 if (N->isMachineOpcode()) { 435 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 436 if (TID.isCall()) 437 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); 438 } 439 else 440 switch (N->getOpcode()) { 441 default: break; 442 case ISD::TokenFactor: 443 case ISD::CopyFromReg: 444 case ISD::CopyToReg: 445 ResCount += PriorityFour; 446 break; 447 448 case ISD::INLINEASM: 449 case ISD::INLINEASM_BR: 450 ResCount += PriorityThree; 451 break; 452 } 453 } 454 return ResCount; 455 } 456 457 458 /// Main resource tracking point. 459 void ResourcePriorityQueue::scheduledNode(SUnit *SU) { 460 // Use NULL entry as an event marker to reset 461 // the DFA state. 462 if (!SU) { 463 ResourcesModel->clearResources(); 464 Packet.clear(); 465 return; 466 } 467 468 const SDNode *ScegN = SU->getNode(); 469 // Update reg pressure tracking. 470 // First update current node. 471 if (ScegN->isMachineOpcode()) { 472 // Estimate generated regs. 473 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 474 MVT VT = ScegN->getSimpleValueType(i); 475 476 if (TLI->isTypeLegal(VT)) { 477 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 478 if (RC) 479 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); 480 } 481 } 482 // Estimate killed regs. 483 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 484 const SDValue &Op = ScegN->getOperand(i); 485 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 486 487 if (TLI->isTypeLegal(VT)) { 488 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 489 if (RC) { 490 if (RegPressure[RC->getID()] > 491 (numberRCValPredInSU(SU, RC->getID()))) 492 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); 493 else RegPressure[RC->getID()] = 0; 494 } 495 } 496 } 497 for (SDep &Pred : SU->Preds) { 498 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0)) 499 continue; 500 --Pred.getSUnit()->NumRegDefsLeft; 501 } 502 } 503 504 // Reserve resources for this SU. 505 reserveResources(SU); 506 507 // Adjust number of parallel live ranges. 508 // Heuristic is simple - node with no data successors reduces 509 // number of live ranges. All others, increase it. 510 unsigned NumberNonControlDeps = 0; 511 512 for (const SDep &Succ : SU->Succs) { 513 adjustPriorityOfUnscheduledPreds(Succ.getSUnit()); 514 if (!Succ.isCtrl()) 515 NumberNonControlDeps++; 516 } 517 518 if (!NumberNonControlDeps) { 519 if (ParallelLiveRanges >= SU->NumPreds) 520 ParallelLiveRanges -= SU->NumPreds; 521 else 522 ParallelLiveRanges = 0; 523 524 } 525 else 526 ParallelLiveRanges += SU->NumRegDefsLeft; 527 528 // Track parallel live chains. 529 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); 530 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); 531 } 532 533 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { 534 unsigned NodeNumDefs = 0; 535 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) 536 if (N->isMachineOpcode()) { 537 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 538 // No register need be allocated for this. 539 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { 540 NodeNumDefs = 0; 541 break; 542 } 543 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); 544 } 545 else 546 switch(N->getOpcode()) { 547 default: break; 548 case ISD::CopyFromReg: 549 NodeNumDefs++; 550 break; 551 case ISD::INLINEASM: 552 case ISD::INLINEASM_BR: 553 NodeNumDefs++; 554 break; 555 } 556 557 SU->NumRegDefsLeft = NodeNumDefs; 558 } 559 560 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just 561 /// scheduled. If SU is not itself available, then there is at least one 562 /// predecessor node that has not been scheduled yet. If SU has exactly ONE 563 /// unscheduled predecessor, we want to increase its priority: it getting 564 /// scheduled will make this node available, so it is better than some other 565 /// node of the same priority that will not make a node available. 566 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { 567 if (SU->isAvailable) return; // All preds scheduled. 568 569 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); 570 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) 571 return; 572 573 // Okay, we found a single predecessor that is available, but not scheduled. 574 // Since it is available, it must be in the priority queue. First remove it. 575 remove(OnlyAvailablePred); 576 577 // Reinsert the node into the priority queue, which recomputes its 578 // NumNodesSolelyBlocking value. 579 push(OnlyAvailablePred); 580 } 581 582 583 /// Main access point - returns next instructions 584 /// to be placed in scheduling sequence. 585 SUnit *ResourcePriorityQueue::pop() { 586 if (empty()) 587 return nullptr; 588 589 std::vector<SUnit *>::iterator Best = Queue.begin(); 590 if (!DisableDFASched) { 591 int BestCost = SUSchedulingCost(*Best); 592 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { 593 594 if (SUSchedulingCost(*I) > BestCost) { 595 BestCost = SUSchedulingCost(*I); 596 Best = I; 597 } 598 } 599 } 600 // Use default TD scheduling mechanism. 601 else { 602 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) 603 if (Picker(*Best, *I)) 604 Best = I; 605 } 606 607 SUnit *V = *Best; 608 if (Best != std::prev(Queue.end())) 609 std::swap(*Best, Queue.back()); 610 611 Queue.pop_back(); 612 613 return V; 614 } 615 616 617 void ResourcePriorityQueue::remove(SUnit *SU) { 618 assert(!Queue.empty() && "Queue is empty!"); 619 std::vector<SUnit *>::iterator I = find(Queue, SU); 620 if (I != std::prev(Queue.end())) 621 std::swap(*I, Queue.back()); 622 623 Queue.pop_back(); 624 } 625