1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ResourcePriorityQueue class, which is a 10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to 11 // reduce the length of the critical path through the basic block 12 // on VLIW platforms. 13 // The scheduler is basically a top-down adaptable list scheduler with DFA 14 // resource tracking added to the cost function. 15 // DFA is queried as a state machine to model "packets/bundles" during 16 // schedule. Currently packets/bundles are discarded at the end of 17 // scheduling, affecting only order of instructions. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/CodeGen/ResourcePriorityQueue.h" 22 #include "llvm/CodeGen/DFAPacketizer.h" 23 #include "llvm/CodeGen/MachineInstr.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/SelectionDAGNodes.h" 26 #include "llvm/CodeGen/TargetInstrInfo.h" 27 #include "llvm/CodeGen/TargetLowering.h" 28 #include "llvm/CodeGen/TargetRegisterInfo.h" 29 #include "llvm/CodeGen/TargetSubtargetInfo.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include "llvm/Target/TargetMachine.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "scheduler" 38 39 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, 40 cl::ZeroOrMore, cl::init(false), 41 cl::desc("Disable use of DFA during scheduling")); 42 43 static cl::opt<int> RegPressureThreshold( 44 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), 45 cl::desc("Track reg pressure and switch priority to in-depth")); 46 47 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) 48 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { 49 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 50 TRI = STI.getRegisterInfo(); 51 TLI = IS->TLI; 52 TII = STI.getInstrInfo(); 53 ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); 54 // This hard requirement could be relaxed, but for now 55 // do not let it proceed. 56 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 57 58 unsigned NumRC = TRI->getNumRegClasses(); 59 RegLimit.resize(NumRC); 60 RegPressure.resize(NumRC); 61 std::fill(RegLimit.begin(), RegLimit.end(), 0); 62 std::fill(RegPressure.begin(), RegPressure.end(), 0); 63 for (const TargetRegisterClass *RC : TRI->regclasses()) 64 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF); 65 66 ParallelLiveRanges = 0; 67 HorizontalVerticalBalance = 0; 68 } 69 70 unsigned 71 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { 72 unsigned NumberDeps = 0; 73 for (SDep &Pred : SU->Preds) { 74 if (Pred.isCtrl()) 75 continue; 76 77 SUnit *PredSU = Pred.getSUnit(); 78 const SDNode *ScegN = PredSU->getNode(); 79 80 if (!ScegN) 81 continue; 82 83 // If value is passed to CopyToReg, it is probably 84 // live outside BB. 85 switch (ScegN->getOpcode()) { 86 default: break; 87 case ISD::TokenFactor: break; 88 case ISD::CopyFromReg: NumberDeps++; break; 89 case ISD::CopyToReg: break; 90 case ISD::INLINEASM: break; 91 case ISD::INLINEASM_BR: break; 92 } 93 if (!ScegN->isMachineOpcode()) 94 continue; 95 96 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 97 MVT VT = ScegN->getSimpleValueType(i); 98 if (TLI->isTypeLegal(VT) 99 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 100 NumberDeps++; 101 break; 102 } 103 } 104 } 105 return NumberDeps; 106 } 107 108 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, 109 unsigned RCId) { 110 unsigned NumberDeps = 0; 111 for (const SDep &Succ : SU->Succs) { 112 if (Succ.isCtrl()) 113 continue; 114 115 SUnit *SuccSU = Succ.getSUnit(); 116 const SDNode *ScegN = SuccSU->getNode(); 117 if (!ScegN) 118 continue; 119 120 // If value is passed to CopyToReg, it is probably 121 // live outside BB. 122 switch (ScegN->getOpcode()) { 123 default: break; 124 case ISD::TokenFactor: break; 125 case ISD::CopyFromReg: break; 126 case ISD::CopyToReg: NumberDeps++; break; 127 case ISD::INLINEASM: break; 128 case ISD::INLINEASM_BR: break; 129 } 130 if (!ScegN->isMachineOpcode()) 131 continue; 132 133 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 134 const SDValue &Op = ScegN->getOperand(i); 135 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 136 if (TLI->isTypeLegal(VT) 137 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 138 NumberDeps++; 139 break; 140 } 141 } 142 } 143 return NumberDeps; 144 } 145 146 static unsigned numberCtrlDepsInSU(SUnit *SU) { 147 unsigned NumberDeps = 0; 148 for (const SDep &Succ : SU->Succs) 149 if (Succ.isCtrl()) 150 NumberDeps++; 151 152 return NumberDeps; 153 } 154 155 static unsigned numberCtrlPredInSU(SUnit *SU) { 156 unsigned NumberDeps = 0; 157 for (SDep &Pred : SU->Preds) 158 if (Pred.isCtrl()) 159 NumberDeps++; 160 161 return NumberDeps; 162 } 163 164 /// 165 /// Initialize nodes. 166 /// 167 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { 168 SUnits = &sunits; 169 NumNodesSolelyBlocking.resize(SUnits->size(), 0); 170 171 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 172 SUnit *SU = &(*SUnits)[i]; 173 initNumRegDefsLeft(SU); 174 SU->NodeQueueId = 0; 175 } 176 } 177 178 /// This heuristic is used if DFA scheduling is not desired 179 /// for some VLIW platform. 180 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { 181 // The isScheduleHigh flag allows nodes with wraparound dependencies that 182 // cannot easily be modeled as edges with latencies to be scheduled as 183 // soon as possible in a top-down schedule. 184 if (LHS->isScheduleHigh && !RHS->isScheduleHigh) 185 return false; 186 187 if (!LHS->isScheduleHigh && RHS->isScheduleHigh) 188 return true; 189 190 unsigned LHSNum = LHS->NodeNum; 191 unsigned RHSNum = RHS->NodeNum; 192 193 // The most important heuristic is scheduling the critical path. 194 unsigned LHSLatency = PQ->getLatency(LHSNum); 195 unsigned RHSLatency = PQ->getLatency(RHSNum); 196 if (LHSLatency < RHSLatency) return true; 197 if (LHSLatency > RHSLatency) return false; 198 199 // After that, if two nodes have identical latencies, look to see if one will 200 // unblock more other nodes than the other. 201 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); 202 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); 203 if (LHSBlocked < RHSBlocked) return true; 204 if (LHSBlocked > RHSBlocked) return false; 205 206 // Finally, just to provide a stable ordering, use the node number as a 207 // deciding factor. 208 return LHSNum < RHSNum; 209 } 210 211 212 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor 213 /// of SU, return it, otherwise return null. 214 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { 215 SUnit *OnlyAvailablePred = nullptr; 216 for (const SDep &Pred : SU->Preds) { 217 SUnit &PredSU = *Pred.getSUnit(); 218 if (!PredSU.isScheduled) { 219 // We found an available, but not scheduled, predecessor. If it's the 220 // only one we have found, keep track of it... otherwise give up. 221 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU) 222 return nullptr; 223 OnlyAvailablePred = &PredSU; 224 } 225 } 226 return OnlyAvailablePred; 227 } 228 229 void ResourcePriorityQueue::push(SUnit *SU) { 230 // Look at all of the successors of this node. Count the number of nodes that 231 // this node is the sole unscheduled node for. 232 unsigned NumNodesBlocking = 0; 233 for (const SDep &Succ : SU->Succs) 234 if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) 235 ++NumNodesBlocking; 236 237 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; 238 Queue.push_back(SU); 239 } 240 241 /// Check if scheduling of this SU is possible 242 /// in the current packet. 243 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { 244 if (!SU || !SU->getNode()) 245 return false; 246 247 // If this is a compound instruction, 248 // it is likely to be a call. Do not delay it. 249 if (SU->getNode()->getGluedNode()) 250 return true; 251 252 // First see if the pipeline could receive this instruction 253 // in the current cycle. 254 if (SU->getNode()->isMachineOpcode()) 255 switch (SU->getNode()->getMachineOpcode()) { 256 default: 257 if (!ResourcesModel->canReserveResources(&TII->get( 258 SU->getNode()->getMachineOpcode()))) 259 return false; 260 break; 261 case TargetOpcode::EXTRACT_SUBREG: 262 case TargetOpcode::INSERT_SUBREG: 263 case TargetOpcode::SUBREG_TO_REG: 264 case TargetOpcode::REG_SEQUENCE: 265 case TargetOpcode::IMPLICIT_DEF: 266 break; 267 } 268 269 // Now see if there are no other dependencies 270 // to instructions already in the packet. 271 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 272 for (const SDep &Succ : Packet[i]->Succs) { 273 // Since we do not add pseudos to packets, might as well 274 // ignore order deps. 275 if (Succ.isCtrl()) 276 continue; 277 278 if (Succ.getSUnit() == SU) 279 return false; 280 } 281 282 return true; 283 } 284 285 /// Keep track of available resources. 286 void ResourcePriorityQueue::reserveResources(SUnit *SU) { 287 // If this SU does not fit in the packet 288 // start a new one. 289 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { 290 ResourcesModel->clearResources(); 291 Packet.clear(); 292 } 293 294 if (SU->getNode() && SU->getNode()->isMachineOpcode()) { 295 switch (SU->getNode()->getMachineOpcode()) { 296 default: 297 ResourcesModel->reserveResources(&TII->get( 298 SU->getNode()->getMachineOpcode())); 299 break; 300 case TargetOpcode::EXTRACT_SUBREG: 301 case TargetOpcode::INSERT_SUBREG: 302 case TargetOpcode::SUBREG_TO_REG: 303 case TargetOpcode::REG_SEQUENCE: 304 case TargetOpcode::IMPLICIT_DEF: 305 break; 306 } 307 Packet.push_back(SU); 308 } 309 // Forcefully end packet for PseudoOps. 310 else { 311 ResourcesModel->clearResources(); 312 Packet.clear(); 313 } 314 315 // If packet is now full, reset the state so in the next cycle 316 // we start fresh. 317 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { 318 ResourcesModel->clearResources(); 319 Packet.clear(); 320 } 321 } 322 323 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { 324 int RegBalance = 0; 325 326 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 327 return RegBalance; 328 329 // Gen estimate. 330 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { 331 MVT VT = SU->getNode()->getSimpleValueType(i); 332 if (TLI->isTypeLegal(VT) 333 && TLI->getRegClassFor(VT) 334 && TLI->getRegClassFor(VT)->getID() == RCId) 335 RegBalance += numberRCValSuccInSU(SU, RCId); 336 } 337 // Kill estimate. 338 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { 339 const SDValue &Op = SU->getNode()->getOperand(i); 340 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 341 if (isa<ConstantSDNode>(Op.getNode())) 342 continue; 343 344 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) 345 && TLI->getRegClassFor(VT)->getID() == RCId) 346 RegBalance -= numberRCValPredInSU(SU, RCId); 347 } 348 return RegBalance; 349 } 350 351 /// Estimates change in reg pressure from this SU. 352 /// It is achieved by trivial tracking of defined 353 /// and used vregs in dependent instructions. 354 /// The RawPressure flag makes this function to ignore 355 /// existing reg file sizes, and report raw def/use 356 /// balance. 357 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { 358 int RegBalance = 0; 359 360 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 361 return RegBalance; 362 363 if (RawPressure) { 364 for (const TargetRegisterClass *RC : TRI->regclasses()) 365 RegBalance += rawRegPressureDelta(SU, RC->getID()); 366 } 367 else { 368 for (const TargetRegisterClass *RC : TRI->regclasses()) { 369 if ((RegPressure[RC->getID()] + 370 rawRegPressureDelta(SU, RC->getID()) > 0) && 371 (RegPressure[RC->getID()] + 372 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) 373 RegBalance += rawRegPressureDelta(SU, RC->getID()); 374 } 375 } 376 377 return RegBalance; 378 } 379 380 // Constants used to denote relative importance of 381 // heuristic components for cost computation. 382 static const unsigned PriorityOne = 200; 383 static const unsigned PriorityTwo = 50; 384 static const unsigned PriorityThree = 15; 385 static const unsigned PriorityFour = 5; 386 static const unsigned ScaleOne = 20; 387 static const unsigned ScaleTwo = 10; 388 static const unsigned ScaleThree = 5; 389 static const unsigned FactorOne = 2; 390 391 /// Returns single number reflecting benefit of scheduling SU 392 /// in the current cycle. 393 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { 394 // Initial trivial priority. 395 int ResCount = 1; 396 397 // Do not waste time on a node that is already scheduled. 398 if (SU->isScheduled) 399 return ResCount; 400 401 // Forced priority is high. 402 if (SU->isScheduleHigh) 403 ResCount += PriorityOne; 404 405 // Adaptable scheduling 406 // A small, but very parallel 407 // region, where reg pressure is an issue. 408 if (HorizontalVerticalBalance > RegPressureThreshold) { 409 // Critical path first 410 ResCount += (SU->getHeight() * ScaleTwo); 411 // If resources are available for it, multiply the 412 // chance of scheduling. 413 if (isResourceAvailable(SU)) 414 ResCount <<= FactorOne; 415 416 // Consider change to reg pressure from scheduling 417 // this SU. 418 ResCount -= (regPressureDelta(SU,true) * ScaleOne); 419 } 420 // Default heuristic, greeady and 421 // critical path driven. 422 else { 423 // Critical path first. 424 ResCount += (SU->getHeight() * ScaleTwo); 425 // Now see how many instructions is blocked by this SU. 426 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); 427 // If resources are available for it, multiply the 428 // chance of scheduling. 429 if (isResourceAvailable(SU)) 430 ResCount <<= FactorOne; 431 432 ResCount -= (regPressureDelta(SU) * ScaleTwo); 433 } 434 435 // These are platform-specific things. 436 // Will need to go into the back end 437 // and accessed from here via a hook. 438 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { 439 if (N->isMachineOpcode()) { 440 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 441 if (TID.isCall()) 442 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); 443 } 444 else 445 switch (N->getOpcode()) { 446 default: break; 447 case ISD::TokenFactor: 448 case ISD::CopyFromReg: 449 case ISD::CopyToReg: 450 ResCount += PriorityFour; 451 break; 452 453 case ISD::INLINEASM: 454 case ISD::INLINEASM_BR: 455 ResCount += PriorityThree; 456 break; 457 } 458 } 459 return ResCount; 460 } 461 462 463 /// Main resource tracking point. 464 void ResourcePriorityQueue::scheduledNode(SUnit *SU) { 465 // Use NULL entry as an event marker to reset 466 // the DFA state. 467 if (!SU) { 468 ResourcesModel->clearResources(); 469 Packet.clear(); 470 return; 471 } 472 473 const SDNode *ScegN = SU->getNode(); 474 // Update reg pressure tracking. 475 // First update current node. 476 if (ScegN->isMachineOpcode()) { 477 // Estimate generated regs. 478 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 479 MVT VT = ScegN->getSimpleValueType(i); 480 481 if (TLI->isTypeLegal(VT)) { 482 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 483 if (RC) 484 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); 485 } 486 } 487 // Estimate killed regs. 488 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 489 const SDValue &Op = ScegN->getOperand(i); 490 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 491 492 if (TLI->isTypeLegal(VT)) { 493 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 494 if (RC) { 495 if (RegPressure[RC->getID()] > 496 (numberRCValPredInSU(SU, RC->getID()))) 497 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); 498 else RegPressure[RC->getID()] = 0; 499 } 500 } 501 } 502 for (SDep &Pred : SU->Preds) { 503 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0)) 504 continue; 505 --Pred.getSUnit()->NumRegDefsLeft; 506 } 507 } 508 509 // Reserve resources for this SU. 510 reserveResources(SU); 511 512 // Adjust number of parallel live ranges. 513 // Heuristic is simple - node with no data successors reduces 514 // number of live ranges. All others, increase it. 515 unsigned NumberNonControlDeps = 0; 516 517 for (const SDep &Succ : SU->Succs) { 518 adjustPriorityOfUnscheduledPreds(Succ.getSUnit()); 519 if (!Succ.isCtrl()) 520 NumberNonControlDeps++; 521 } 522 523 if (!NumberNonControlDeps) { 524 if (ParallelLiveRanges >= SU->NumPreds) 525 ParallelLiveRanges -= SU->NumPreds; 526 else 527 ParallelLiveRanges = 0; 528 529 } 530 else 531 ParallelLiveRanges += SU->NumRegDefsLeft; 532 533 // Track parallel live chains. 534 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); 535 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); 536 } 537 538 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { 539 unsigned NodeNumDefs = 0; 540 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) 541 if (N->isMachineOpcode()) { 542 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 543 // No register need be allocated for this. 544 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { 545 NodeNumDefs = 0; 546 break; 547 } 548 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); 549 } 550 else 551 switch(N->getOpcode()) { 552 default: break; 553 case ISD::CopyFromReg: 554 NodeNumDefs++; 555 break; 556 case ISD::INLINEASM: 557 case ISD::INLINEASM_BR: 558 NodeNumDefs++; 559 break; 560 } 561 562 SU->NumRegDefsLeft = NodeNumDefs; 563 } 564 565 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just 566 /// scheduled. If SU is not itself available, then there is at least one 567 /// predecessor node that has not been scheduled yet. If SU has exactly ONE 568 /// unscheduled predecessor, we want to increase its priority: it getting 569 /// scheduled will make this node available, so it is better than some other 570 /// node of the same priority that will not make a node available. 571 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { 572 if (SU->isAvailable) return; // All preds scheduled. 573 574 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); 575 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) 576 return; 577 578 // Okay, we found a single predecessor that is available, but not scheduled. 579 // Since it is available, it must be in the priority queue. First remove it. 580 remove(OnlyAvailablePred); 581 582 // Reinsert the node into the priority queue, which recomputes its 583 // NumNodesSolelyBlocking value. 584 push(OnlyAvailablePred); 585 } 586 587 588 /// Main access point - returns next instructions 589 /// to be placed in scheduling sequence. 590 SUnit *ResourcePriorityQueue::pop() { 591 if (empty()) 592 return nullptr; 593 594 std::vector<SUnit *>::iterator Best = Queue.begin(); 595 if (!DisableDFASched) { 596 int BestCost = SUSchedulingCost(*Best); 597 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { 598 599 if (SUSchedulingCost(*I) > BestCost) { 600 BestCost = SUSchedulingCost(*I); 601 Best = I; 602 } 603 } 604 } 605 // Use default TD scheduling mechanism. 606 else { 607 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) 608 if (Picker(*Best, *I)) 609 Best = I; 610 } 611 612 SUnit *V = *Best; 613 if (Best != std::prev(Queue.end())) 614 std::swap(*Best, Queue.back()); 615 616 Queue.pop_back(); 617 618 return V; 619 } 620 621 622 void ResourcePriorityQueue::remove(SUnit *SU) { 623 assert(!Queue.empty() && "Queue is empty!"); 624 std::vector<SUnit *>::iterator I = find(Queue, SU); 625 if (I != std::prev(Queue.end())) 626 std::swap(*I, Queue.back()); 627 628 Queue.pop_back(); 629 } 630