1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ResourcePriorityQueue class, which is a 10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to 11 // reduce the length of the critical path through the basic block 12 // on VLIW platforms. 13 // The scheduler is basically a top-down adaptable list scheduler with DFA 14 // resource tracking added to the cost function. 15 // DFA is queried as a state machine to model "packets/bundles" during 16 // schedule. Currently packets/bundles are discarded at the end of 17 // scheduling, affecting only order of instructions. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/CodeGen/ResourcePriorityQueue.h" 22 #include "llvm/CodeGen/MachineInstr.h" 23 #include "llvm/CodeGen/SelectionDAGNodes.h" 24 #include "llvm/CodeGen/TargetLowering.h" 25 #include "llvm/CodeGen/TargetSubtargetInfo.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Target/TargetMachine.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "scheduler" 34 35 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, 36 cl::ZeroOrMore, cl::init(false), 37 cl::desc("Disable use of DFA during scheduling")); 38 39 static cl::opt<int> RegPressureThreshold( 40 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), 41 cl::desc("Track reg pressure and switch priority to in-depth")); 42 43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) 44 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { 45 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 46 TRI = STI.getRegisterInfo(); 47 TLI = IS->TLI; 48 TII = STI.getInstrInfo(); 49 ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); 50 // This hard requirement could be relaxed, but for now 51 // do not let it proceed. 52 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 53 54 unsigned NumRC = TRI->getNumRegClasses(); 55 RegLimit.resize(NumRC); 56 RegPressure.resize(NumRC); 57 std::fill(RegLimit.begin(), RegLimit.end(), 0); 58 std::fill(RegPressure.begin(), RegPressure.end(), 0); 59 for (const TargetRegisterClass *RC : TRI->regclasses()) 60 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF); 61 62 ParallelLiveRanges = 0; 63 HorizontalVerticalBalance = 0; 64 } 65 66 unsigned 67 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { 68 unsigned NumberDeps = 0; 69 for (SDep &Pred : SU->Preds) { 70 if (Pred.isCtrl()) 71 continue; 72 73 SUnit *PredSU = Pred.getSUnit(); 74 const SDNode *ScegN = PredSU->getNode(); 75 76 if (!ScegN) 77 continue; 78 79 // If value is passed to CopyToReg, it is probably 80 // live outside BB. 81 switch (ScegN->getOpcode()) { 82 default: break; 83 case ISD::TokenFactor: break; 84 case ISD::CopyFromReg: NumberDeps++; break; 85 case ISD::CopyToReg: break; 86 case ISD::INLINEASM: break; 87 case ISD::INLINEASM_BR: break; 88 } 89 if (!ScegN->isMachineOpcode()) 90 continue; 91 92 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 93 MVT VT = ScegN->getSimpleValueType(i); 94 if (TLI->isTypeLegal(VT) 95 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 96 NumberDeps++; 97 break; 98 } 99 } 100 } 101 return NumberDeps; 102 } 103 104 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, 105 unsigned RCId) { 106 unsigned NumberDeps = 0; 107 for (const SDep &Succ : SU->Succs) { 108 if (Succ.isCtrl()) 109 continue; 110 111 SUnit *SuccSU = Succ.getSUnit(); 112 const SDNode *ScegN = SuccSU->getNode(); 113 if (!ScegN) 114 continue; 115 116 // If value is passed to CopyToReg, it is probably 117 // live outside BB. 118 switch (ScegN->getOpcode()) { 119 default: break; 120 case ISD::TokenFactor: break; 121 case ISD::CopyFromReg: break; 122 case ISD::CopyToReg: NumberDeps++; break; 123 case ISD::INLINEASM: break; 124 case ISD::INLINEASM_BR: break; 125 } 126 if (!ScegN->isMachineOpcode()) 127 continue; 128 129 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 130 const SDValue &Op = ScegN->getOperand(i); 131 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 132 if (TLI->isTypeLegal(VT) 133 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 134 NumberDeps++; 135 break; 136 } 137 } 138 } 139 return NumberDeps; 140 } 141 142 static unsigned numberCtrlDepsInSU(SUnit *SU) { 143 unsigned NumberDeps = 0; 144 for (const SDep &Succ : SU->Succs) 145 if (Succ.isCtrl()) 146 NumberDeps++; 147 148 return NumberDeps; 149 } 150 151 static unsigned numberCtrlPredInSU(SUnit *SU) { 152 unsigned NumberDeps = 0; 153 for (SDep &Pred : SU->Preds) 154 if (Pred.isCtrl()) 155 NumberDeps++; 156 157 return NumberDeps; 158 } 159 160 /// 161 /// Initialize nodes. 162 /// 163 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { 164 SUnits = &sunits; 165 NumNodesSolelyBlocking.resize(SUnits->size(), 0); 166 167 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 168 SUnit *SU = &(*SUnits)[i]; 169 initNumRegDefsLeft(SU); 170 SU->NodeQueueId = 0; 171 } 172 } 173 174 /// This heuristic is used if DFA scheduling is not desired 175 /// for some VLIW platform. 176 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { 177 // The isScheduleHigh flag allows nodes with wraparound dependencies that 178 // cannot easily be modeled as edges with latencies to be scheduled as 179 // soon as possible in a top-down schedule. 180 if (LHS->isScheduleHigh && !RHS->isScheduleHigh) 181 return false; 182 183 if (!LHS->isScheduleHigh && RHS->isScheduleHigh) 184 return true; 185 186 unsigned LHSNum = LHS->NodeNum; 187 unsigned RHSNum = RHS->NodeNum; 188 189 // The most important heuristic is scheduling the critical path. 190 unsigned LHSLatency = PQ->getLatency(LHSNum); 191 unsigned RHSLatency = PQ->getLatency(RHSNum); 192 if (LHSLatency < RHSLatency) return true; 193 if (LHSLatency > RHSLatency) return false; 194 195 // After that, if two nodes have identical latencies, look to see if one will 196 // unblock more other nodes than the other. 197 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); 198 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); 199 if (LHSBlocked < RHSBlocked) return true; 200 if (LHSBlocked > RHSBlocked) return false; 201 202 // Finally, just to provide a stable ordering, use the node number as a 203 // deciding factor. 204 return LHSNum < RHSNum; 205 } 206 207 208 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor 209 /// of SU, return it, otherwise return null. 210 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { 211 SUnit *OnlyAvailablePred = nullptr; 212 for (const SDep &Pred : SU->Preds) { 213 SUnit &PredSU = *Pred.getSUnit(); 214 if (!PredSU.isScheduled) { 215 // We found an available, but not scheduled, predecessor. If it's the 216 // only one we have found, keep track of it... otherwise give up. 217 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU) 218 return nullptr; 219 OnlyAvailablePred = &PredSU; 220 } 221 } 222 return OnlyAvailablePred; 223 } 224 225 void ResourcePriorityQueue::push(SUnit *SU) { 226 // Look at all of the successors of this node. Count the number of nodes that 227 // this node is the sole unscheduled node for. 228 unsigned NumNodesBlocking = 0; 229 for (const SDep &Succ : SU->Succs) 230 if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) 231 ++NumNodesBlocking; 232 233 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; 234 Queue.push_back(SU); 235 } 236 237 /// Check if scheduling of this SU is possible 238 /// in the current packet. 239 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { 240 if (!SU || !SU->getNode()) 241 return false; 242 243 // If this is a compound instruction, 244 // it is likely to be a call. Do not delay it. 245 if (SU->getNode()->getGluedNode()) 246 return true; 247 248 // First see if the pipeline could receive this instruction 249 // in the current cycle. 250 if (SU->getNode()->isMachineOpcode()) 251 switch (SU->getNode()->getMachineOpcode()) { 252 default: 253 if (!ResourcesModel->canReserveResources(&TII->get( 254 SU->getNode()->getMachineOpcode()))) 255 return false; 256 break; 257 case TargetOpcode::EXTRACT_SUBREG: 258 case TargetOpcode::INSERT_SUBREG: 259 case TargetOpcode::SUBREG_TO_REG: 260 case TargetOpcode::REG_SEQUENCE: 261 case TargetOpcode::IMPLICIT_DEF: 262 break; 263 } 264 265 // Now see if there are no other dependencies 266 // to instructions already in the packet. 267 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 268 for (const SDep &Succ : Packet[i]->Succs) { 269 // Since we do not add pseudos to packets, might as well 270 // ignore order deps. 271 if (Succ.isCtrl()) 272 continue; 273 274 if (Succ.getSUnit() == SU) 275 return false; 276 } 277 278 return true; 279 } 280 281 /// Keep track of available resources. 282 void ResourcePriorityQueue::reserveResources(SUnit *SU) { 283 // If this SU does not fit in the packet 284 // start a new one. 285 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { 286 ResourcesModel->clearResources(); 287 Packet.clear(); 288 } 289 290 if (SU->getNode() && SU->getNode()->isMachineOpcode()) { 291 switch (SU->getNode()->getMachineOpcode()) { 292 default: 293 ResourcesModel->reserveResources(&TII->get( 294 SU->getNode()->getMachineOpcode())); 295 break; 296 case TargetOpcode::EXTRACT_SUBREG: 297 case TargetOpcode::INSERT_SUBREG: 298 case TargetOpcode::SUBREG_TO_REG: 299 case TargetOpcode::REG_SEQUENCE: 300 case TargetOpcode::IMPLICIT_DEF: 301 break; 302 } 303 Packet.push_back(SU); 304 } 305 // Forcefully end packet for PseudoOps. 306 else { 307 ResourcesModel->clearResources(); 308 Packet.clear(); 309 } 310 311 // If packet is now full, reset the state so in the next cycle 312 // we start fresh. 313 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { 314 ResourcesModel->clearResources(); 315 Packet.clear(); 316 } 317 } 318 319 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { 320 int RegBalance = 0; 321 322 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 323 return RegBalance; 324 325 // Gen estimate. 326 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { 327 MVT VT = SU->getNode()->getSimpleValueType(i); 328 if (TLI->isTypeLegal(VT) 329 && TLI->getRegClassFor(VT) 330 && TLI->getRegClassFor(VT)->getID() == RCId) 331 RegBalance += numberRCValSuccInSU(SU, RCId); 332 } 333 // Kill estimate. 334 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { 335 const SDValue &Op = SU->getNode()->getOperand(i); 336 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 337 if (isa<ConstantSDNode>(Op.getNode())) 338 continue; 339 340 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) 341 && TLI->getRegClassFor(VT)->getID() == RCId) 342 RegBalance -= numberRCValPredInSU(SU, RCId); 343 } 344 return RegBalance; 345 } 346 347 /// Estimates change in reg pressure from this SU. 348 /// It is achieved by trivial tracking of defined 349 /// and used vregs in dependent instructions. 350 /// The RawPressure flag makes this function to ignore 351 /// existing reg file sizes, and report raw def/use 352 /// balance. 353 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { 354 int RegBalance = 0; 355 356 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 357 return RegBalance; 358 359 if (RawPressure) { 360 for (const TargetRegisterClass *RC : TRI->regclasses()) 361 RegBalance += rawRegPressureDelta(SU, RC->getID()); 362 } 363 else { 364 for (const TargetRegisterClass *RC : TRI->regclasses()) { 365 if ((RegPressure[RC->getID()] + 366 rawRegPressureDelta(SU, RC->getID()) > 0) && 367 (RegPressure[RC->getID()] + 368 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) 369 RegBalance += rawRegPressureDelta(SU, RC->getID()); 370 } 371 } 372 373 return RegBalance; 374 } 375 376 // Constants used to denote relative importance of 377 // heuristic components for cost computation. 378 static const unsigned PriorityOne = 200; 379 static const unsigned PriorityTwo = 50; 380 static const unsigned PriorityThree = 15; 381 static const unsigned PriorityFour = 5; 382 static const unsigned ScaleOne = 20; 383 static const unsigned ScaleTwo = 10; 384 static const unsigned ScaleThree = 5; 385 static const unsigned FactorOne = 2; 386 387 /// Returns single number reflecting benefit of scheduling SU 388 /// in the current cycle. 389 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { 390 // Initial trivial priority. 391 int ResCount = 1; 392 393 // Do not waste time on a node that is already scheduled. 394 if (SU->isScheduled) 395 return ResCount; 396 397 // Forced priority is high. 398 if (SU->isScheduleHigh) 399 ResCount += PriorityOne; 400 401 // Adaptable scheduling 402 // A small, but very parallel 403 // region, where reg pressure is an issue. 404 if (HorizontalVerticalBalance > RegPressureThreshold) { 405 // Critical path first 406 ResCount += (SU->getHeight() * ScaleTwo); 407 // If resources are available for it, multiply the 408 // chance of scheduling. 409 if (isResourceAvailable(SU)) 410 ResCount <<= FactorOne; 411 412 // Consider change to reg pressure from scheduling 413 // this SU. 414 ResCount -= (regPressureDelta(SU,true) * ScaleOne); 415 } 416 // Default heuristic, greeady and 417 // critical path driven. 418 else { 419 // Critical path first. 420 ResCount += (SU->getHeight() * ScaleTwo); 421 // Now see how many instructions is blocked by this SU. 422 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); 423 // If resources are available for it, multiply the 424 // chance of scheduling. 425 if (isResourceAvailable(SU)) 426 ResCount <<= FactorOne; 427 428 ResCount -= (regPressureDelta(SU) * ScaleTwo); 429 } 430 431 // These are platform-specific things. 432 // Will need to go into the back end 433 // and accessed from here via a hook. 434 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { 435 if (N->isMachineOpcode()) { 436 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 437 if (TID.isCall()) 438 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); 439 } 440 else 441 switch (N->getOpcode()) { 442 default: break; 443 case ISD::TokenFactor: 444 case ISD::CopyFromReg: 445 case ISD::CopyToReg: 446 ResCount += PriorityFour; 447 break; 448 449 case ISD::INLINEASM: 450 case ISD::INLINEASM_BR: 451 ResCount += PriorityThree; 452 break; 453 } 454 } 455 return ResCount; 456 } 457 458 459 /// Main resource tracking point. 460 void ResourcePriorityQueue::scheduledNode(SUnit *SU) { 461 // Use NULL entry as an event marker to reset 462 // the DFA state. 463 if (!SU) { 464 ResourcesModel->clearResources(); 465 Packet.clear(); 466 return; 467 } 468 469 const SDNode *ScegN = SU->getNode(); 470 // Update reg pressure tracking. 471 // First update current node. 472 if (ScegN->isMachineOpcode()) { 473 // Estimate generated regs. 474 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 475 MVT VT = ScegN->getSimpleValueType(i); 476 477 if (TLI->isTypeLegal(VT)) { 478 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 479 if (RC) 480 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); 481 } 482 } 483 // Estimate killed regs. 484 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 485 const SDValue &Op = ScegN->getOperand(i); 486 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 487 488 if (TLI->isTypeLegal(VT)) { 489 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 490 if (RC) { 491 if (RegPressure[RC->getID()] > 492 (numberRCValPredInSU(SU, RC->getID()))) 493 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); 494 else RegPressure[RC->getID()] = 0; 495 } 496 } 497 } 498 for (SDep &Pred : SU->Preds) { 499 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0)) 500 continue; 501 --Pred.getSUnit()->NumRegDefsLeft; 502 } 503 } 504 505 // Reserve resources for this SU. 506 reserveResources(SU); 507 508 // Adjust number of parallel live ranges. 509 // Heuristic is simple - node with no data successors reduces 510 // number of live ranges. All others, increase it. 511 unsigned NumberNonControlDeps = 0; 512 513 for (const SDep &Succ : SU->Succs) { 514 adjustPriorityOfUnscheduledPreds(Succ.getSUnit()); 515 if (!Succ.isCtrl()) 516 NumberNonControlDeps++; 517 } 518 519 if (!NumberNonControlDeps) { 520 if (ParallelLiveRanges >= SU->NumPreds) 521 ParallelLiveRanges -= SU->NumPreds; 522 else 523 ParallelLiveRanges = 0; 524 525 } 526 else 527 ParallelLiveRanges += SU->NumRegDefsLeft; 528 529 // Track parallel live chains. 530 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); 531 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); 532 } 533 534 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { 535 unsigned NodeNumDefs = 0; 536 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) 537 if (N->isMachineOpcode()) { 538 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 539 // No register need be allocated for this. 540 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { 541 NodeNumDefs = 0; 542 break; 543 } 544 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); 545 } 546 else 547 switch(N->getOpcode()) { 548 default: break; 549 case ISD::CopyFromReg: 550 NodeNumDefs++; 551 break; 552 case ISD::INLINEASM: 553 case ISD::INLINEASM_BR: 554 NodeNumDefs++; 555 break; 556 } 557 558 SU->NumRegDefsLeft = NodeNumDefs; 559 } 560 561 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just 562 /// scheduled. If SU is not itself available, then there is at least one 563 /// predecessor node that has not been scheduled yet. If SU has exactly ONE 564 /// unscheduled predecessor, we want to increase its priority: it getting 565 /// scheduled will make this node available, so it is better than some other 566 /// node of the same priority that will not make a node available. 567 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { 568 if (SU->isAvailable) return; // All preds scheduled. 569 570 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); 571 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) 572 return; 573 574 // Okay, we found a single predecessor that is available, but not scheduled. 575 // Since it is available, it must be in the priority queue. First remove it. 576 remove(OnlyAvailablePred); 577 578 // Reinsert the node into the priority queue, which recomputes its 579 // NumNodesSolelyBlocking value. 580 push(OnlyAvailablePred); 581 } 582 583 584 /// Main access point - returns next instructions 585 /// to be placed in scheduling sequence. 586 SUnit *ResourcePriorityQueue::pop() { 587 if (empty()) 588 return nullptr; 589 590 std::vector<SUnit *>::iterator Best = Queue.begin(); 591 if (!DisableDFASched) { 592 int BestCost = SUSchedulingCost(*Best); 593 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { 594 595 if (SUSchedulingCost(*I) > BestCost) { 596 BestCost = SUSchedulingCost(*I); 597 Best = I; 598 } 599 } 600 } 601 // Use default TD scheduling mechanism. 602 else { 603 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) 604 if (Picker(*Best, *I)) 605 Best = I; 606 } 607 608 SUnit *V = *Best; 609 if (Best != std::prev(Queue.end())) 610 std::swap(*Best, Queue.back()); 611 612 Queue.pop_back(); 613 614 return V; 615 } 616 617 618 void ResourcePriorityQueue::remove(SUnit *SU) { 619 assert(!Queue.empty() && "Queue is empty!"); 620 std::vector<SUnit *>::iterator I = find(Queue, SU); 621 if (I != std::prev(Queue.end())) 622 std::swap(*I, Queue.back()); 623 624 Queue.pop_back(); 625 } 626