1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements bottom-up and top-down register pressure reduction list 10 // schedulers, using standard algorithms. The basic approach uses a priority 11 // queue of available nodes to schedule. One at a time, nodes are taken from 12 // the priority queue (thus in priority order), checked for legality to 13 // schedule, and emitted if legal. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "ScheduleDAGSDNodes.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/CodeGen/ISDOpcodes.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineOperand.h" 27 #include "llvm/CodeGen/Register.h" 28 #include "llvm/CodeGen/ScheduleDAG.h" 29 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 30 #include "llvm/CodeGen/SchedulerRegistry.h" 31 #include "llvm/CodeGen/SelectionDAGISel.h" 32 #include "llvm/CodeGen/SelectionDAGNodes.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/CodeGen/TargetLowering.h" 35 #include "llvm/CodeGen/TargetOpcodes.h" 36 #include "llvm/CodeGen/TargetRegisterInfo.h" 37 #include "llvm/CodeGen/TargetSubtargetInfo.h" 38 #include "llvm/CodeGenTypes/MachineValueType.h" 39 #include "llvm/Config/llvm-config.h" 40 #include "llvm/IR/InlineAsm.h" 41 #include "llvm/MC/MCInstrDesc.h" 42 #include "llvm/MC/MCRegisterInfo.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/CommandLine.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/Debug.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include <algorithm> 51 #include <cassert> 52 #include <cstdint> 53 #include <cstdlib> 54 #include <iterator> 55 #include <limits> 56 #include <memory> 57 #include <utility> 58 #include <vector> 59 60 using namespace llvm; 61 62 #define DEBUG_TYPE "pre-RA-sched" 63 64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 65 STATISTIC(NumUnfolds, "Number of nodes unfolded"); 66 STATISTIC(NumDups, "Number of duplicated nodes"); 67 STATISTIC(NumPRCopies, "Number of physical register copies"); 68 69 static RegisterScheduler 70 burrListDAGScheduler("list-burr", 71 "Bottom-up register reduction list scheduling", 72 createBURRListDAGScheduler); 73 74 static RegisterScheduler 75 sourceListDAGScheduler("source", 76 "Similar to list-burr but schedules in source " 77 "order when possible", 78 createSourceListDAGScheduler); 79 80 static RegisterScheduler 81 hybridListDAGScheduler("list-hybrid", 82 "Bottom-up register pressure aware list scheduling " 83 "which tries to balance latency and register pressure", 84 createHybridListDAGScheduler); 85 86 static RegisterScheduler 87 ILPListDAGScheduler("list-ilp", 88 "Bottom-up register pressure aware list scheduling " 89 "which tries to balance ILP and register pressure", 90 createILPListDAGScheduler); 91 92 static cl::opt<bool> DisableSchedCycles( 93 "disable-sched-cycles", cl::Hidden, cl::init(false), 94 cl::desc("Disable cycle-level precision during preRA scheduling")); 95 96 // Temporary sched=list-ilp flags until the heuristics are robust. 97 // Some options are also available under sched=list-hybrid. 98 static cl::opt<bool> DisableSchedRegPressure( 99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 100 cl::desc("Disable regpressure priority in sched=list-ilp")); 101 static cl::opt<bool> DisableSchedLiveUses( 102 "disable-sched-live-uses", cl::Hidden, cl::init(true), 103 cl::desc("Disable live use priority in sched=list-ilp")); 104 static cl::opt<bool> DisableSchedVRegCycle( 105 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 106 cl::desc("Disable virtual register cycle interference checks")); 107 static cl::opt<bool> DisableSchedPhysRegJoin( 108 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 109 cl::desc("Disable physreg def-use affinity")); 110 static cl::opt<bool> DisableSchedStalls( 111 "disable-sched-stalls", cl::Hidden, cl::init(true), 112 cl::desc("Disable no-stall priority in sched=list-ilp")); 113 static cl::opt<bool> DisableSchedCriticalPath( 114 "disable-sched-critical-path", cl::Hidden, cl::init(false), 115 cl::desc("Disable critical path priority in sched=list-ilp")); 116 static cl::opt<bool> DisableSchedHeight( 117 "disable-sched-height", cl::Hidden, cl::init(false), 118 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 119 static cl::opt<bool> Disable2AddrHack( 120 "disable-2addr-hack", cl::Hidden, cl::init(true), 121 cl::desc("Disable scheduler's two-address hack")); 122 123 static cl::opt<int> MaxReorderWindow( 124 "max-sched-reorder", cl::Hidden, cl::init(6), 125 cl::desc("Number of instructions to allow ahead of the critical path " 126 "in sched=list-ilp")); 127 128 static cl::opt<unsigned> AvgIPC( 129 "sched-avg-ipc", cl::Hidden, cl::init(1), 130 cl::desc("Average inst/cycle whan no target itinerary exists.")); 131 132 namespace { 133 134 //===----------------------------------------------------------------------===// 135 /// ScheduleDAGRRList - The actual register reduction list scheduler 136 /// implementation. This supports both top-down and bottom-up scheduling. 137 /// 138 class ScheduleDAGRRList : public ScheduleDAGSDNodes { 139 private: 140 /// NeedLatency - True if the scheduler will make use of latency information. 141 bool NeedLatency; 142 143 /// AvailableQueue - The priority queue to use for the available SUnits. 144 SchedulingPriorityQueue *AvailableQueue; 145 146 /// PendingQueue - This contains all of the instructions whose operands have 147 /// been issued, but their results are not ready yet (due to the latency of 148 /// the operation). Once the operands becomes available, the instruction is 149 /// added to the AvailableQueue. 150 std::vector<SUnit *> PendingQueue; 151 152 /// HazardRec - The hazard recognizer to use. 153 ScheduleHazardRecognizer *HazardRec; 154 155 /// CurCycle - The current scheduler state corresponds to this cycle. 156 unsigned CurCycle = 0; 157 158 /// MinAvailableCycle - Cycle of the soonest available instruction. 159 unsigned MinAvailableCycle = ~0u; 160 161 /// IssueCount - Count instructions issued in this cycle 162 /// Currently valid only for bottom-up scheduling. 163 unsigned IssueCount = 0u; 164 165 /// LiveRegDefs - A set of physical registers and their definition 166 /// that are "live". These nodes must be scheduled before any other nodes that 167 /// modifies the registers can be scheduled. 168 unsigned NumLiveRegs = 0u; 169 std::unique_ptr<SUnit*[]> LiveRegDefs; 170 std::unique_ptr<SUnit*[]> LiveRegGens; 171 172 // Collect interferences between physical register use/defs. 173 // Each interference is an SUnit and set of physical registers. 174 SmallVector<SUnit*, 4> Interferences; 175 176 using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>; 177 178 LRegsMapT LRegsMap; 179 180 /// Topo - A topological ordering for SUnits which permits fast IsReachable 181 /// and similar queries. 182 ScheduleDAGTopologicalSort Topo; 183 184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 185 // DAG crawling. 186 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 187 188 public: 189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 190 SchedulingPriorityQueue *availqueue, 191 CodeGenOptLevel OptLevel) 192 : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), 193 AvailableQueue(availqueue), Topo(SUnits, nullptr) { 194 const TargetSubtargetInfo &STI = mf.getSubtarget(); 195 if (DisableSchedCycles || !NeedLatency) 196 HazardRec = new ScheduleHazardRecognizer(); 197 else 198 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); 199 } 200 201 ~ScheduleDAGRRList() override { 202 delete HazardRec; 203 delete AvailableQueue; 204 } 205 206 void Schedule() override; 207 208 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 209 210 /// IsReachable - Checks if SU is reachable from TargetSU. 211 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 212 return Topo.IsReachable(SU, TargetSU); 213 } 214 215 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 216 /// create a cycle. 217 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 218 return Topo.WillCreateCycle(SU, TargetSU); 219 } 220 221 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU. 222 /// This returns true if this is a new predecessor. 223 /// Does *NOT* update the topological ordering! It just queues an update. 224 void AddPredQueued(SUnit *SU, const SDep &D) { 225 Topo.AddPredQueued(SU, D.getSUnit()); 226 SU->addPred(D); 227 } 228 229 /// AddPred - adds a predecessor edge to SUnit SU. 230 /// This returns true if this is a new predecessor. 231 /// Updates the topological ordering if required. 232 void AddPred(SUnit *SU, const SDep &D) { 233 Topo.AddPred(SU, D.getSUnit()); 234 SU->addPred(D); 235 } 236 237 /// RemovePred - removes a predecessor edge from SUnit SU. 238 /// This returns true if an edge was removed. 239 /// Updates the topological ordering if required. 240 void RemovePred(SUnit *SU, const SDep &D) { 241 Topo.RemovePred(SU, D.getSUnit()); 242 SU->removePred(D); 243 } 244 245 private: 246 bool isReady(SUnit *SU) { 247 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 248 AvailableQueue->isReady(SU); 249 } 250 251 void ReleasePred(SUnit *SU, const SDep *PredEdge); 252 void ReleasePredecessors(SUnit *SU); 253 void ReleasePending(); 254 void AdvanceToCycle(unsigned NextCycle); 255 void AdvancePastStalls(SUnit *SU); 256 void EmitNode(SUnit *SU); 257 void ScheduleNodeBottomUp(SUnit*); 258 void CapturePred(SDep *PredEdge); 259 void UnscheduleNodeBottomUp(SUnit*); 260 void RestoreHazardCheckerBottomUp(); 261 void BacktrackBottomUp(SUnit*, SUnit*); 262 SUnit *TryUnfoldSU(SUnit *); 263 SUnit *CopyAndMoveSuccessors(SUnit*); 264 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 265 const TargetRegisterClass*, 266 const TargetRegisterClass*, 267 SmallVectorImpl<SUnit*>&); 268 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); 269 270 void releaseInterferences(unsigned Reg = 0); 271 272 SUnit *PickNodeToScheduleBottomUp(); 273 void ListScheduleBottomUp(); 274 275 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 276 SUnit *CreateNewSUnit(SDNode *N) { 277 unsigned NumSUnits = SUnits.size(); 278 SUnit *NewNode = newSUnit(N); 279 // Update the topological ordering. 280 if (NewNode->NodeNum >= NumSUnits) 281 Topo.AddSUnitWithoutPredecessors(NewNode); 282 return NewNode; 283 } 284 285 /// CreateClone - Creates a new SUnit from an existing one. 286 SUnit *CreateClone(SUnit *N) { 287 unsigned NumSUnits = SUnits.size(); 288 SUnit *NewNode = Clone(N); 289 // Update the topological ordering. 290 if (NewNode->NodeNum >= NumSUnits) 291 Topo.AddSUnitWithoutPredecessors(NewNode); 292 return NewNode; 293 } 294 295 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't 296 /// need actual latency information but the hybrid scheduler does. 297 bool forceUnitLatencies() const override { 298 return !NeedLatency; 299 } 300 }; 301 302 } // end anonymous namespace 303 304 static constexpr unsigned RegSequenceCost = 1; 305 306 /// GetCostForDef - Looks up the register class and cost for a given definition. 307 /// Typically this just means looking up the representative register class, 308 /// but for untyped values (MVT::Untyped) it means inspecting the node's 309 /// opcode to determine what register class is being generated. 310 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 311 const TargetLowering *TLI, 312 const TargetInstrInfo *TII, 313 const TargetRegisterInfo *TRI, 314 unsigned &RegClass, unsigned &Cost, 315 const MachineFunction &MF) { 316 MVT VT = RegDefPos.GetValue(); 317 318 // Special handling for untyped values. These values can only come from 319 // the expansion of custom DAG-to-DAG patterns. 320 if (VT == MVT::Untyped) { 321 const SDNode *Node = RegDefPos.GetNode(); 322 323 // Special handling for CopyFromReg of untyped values. 324 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { 325 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); 326 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); 327 RegClass = RC->getID(); 328 Cost = 1; 329 return; 330 } 331 332 unsigned Opcode = Node->getMachineOpcode(); 333 if (Opcode == TargetOpcode::REG_SEQUENCE) { 334 unsigned DstRCIdx = Node->getConstantOperandVal(0); 335 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 336 RegClass = RC->getID(); 337 Cost = RegSequenceCost; 338 return; 339 } 340 341 unsigned Idx = RegDefPos.GetIdx(); 342 const MCInstrDesc &Desc = TII->get(Opcode); 343 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); 344 assert(RC && "Not a valid register class"); 345 RegClass = RC->getID(); 346 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 347 // better way to determine it. 348 Cost = 1; 349 } else { 350 RegClass = TLI->getRepRegClassFor(VT)->getID(); 351 Cost = TLI->getRepRegClassCostFor(VT); 352 } 353 } 354 355 /// Schedule - Schedule the DAG using list scheduling. 356 void ScheduleDAGRRList::Schedule() { 357 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) 358 << " '" << BB->getName() << "' **********\n"); 359 360 CurCycle = 0; 361 IssueCount = 0; 362 MinAvailableCycle = 363 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max(); 364 NumLiveRegs = 0; 365 // Allocate slots for each physical register, plus one for a special register 366 // to track the virtual resource of a calling sequence. 367 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); 368 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); 369 CallSeqEndForStart.clear(); 370 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); 371 372 // Build the scheduling graph. 373 BuildSchedGraph(nullptr); 374 375 LLVM_DEBUG(dump()); 376 Topo.MarkDirty(); 377 378 AvailableQueue->initNodes(SUnits); 379 380 HazardRec->Reset(); 381 382 // Execute the actual scheduling loop. 383 ListScheduleBottomUp(); 384 385 AvailableQueue->releaseState(); 386 387 LLVM_DEBUG({ 388 dbgs() << "*** Final schedule ***\n"; 389 dumpSchedule(); 390 dbgs() << '\n'; 391 }); 392 } 393 394 //===----------------------------------------------------------------------===// 395 // Bottom-Up Scheduling 396 //===----------------------------------------------------------------------===// 397 398 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 399 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 400 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 401 SUnit *PredSU = PredEdge->getSUnit(); 402 403 #ifndef NDEBUG 404 if (PredSU->NumSuccsLeft == 0) { 405 dbgs() << "*** Scheduling failed! ***\n"; 406 dumpNode(*PredSU); 407 dbgs() << " has been released too many times!\n"; 408 llvm_unreachable(nullptr); 409 } 410 #endif 411 --PredSU->NumSuccsLeft; 412 413 if (!forceUnitLatencies()) { 414 // Updating predecessor's height. This is now the cycle when the 415 // predecessor can be scheduled without causing a pipeline stall. 416 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 417 } 418 419 // If all the node's successors are scheduled, this node is ready 420 // to be scheduled. Ignore the special EntrySU node. 421 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 422 PredSU->isAvailable = true; 423 424 unsigned Height = PredSU->getHeight(); 425 if (Height < MinAvailableCycle) 426 MinAvailableCycle = Height; 427 428 if (isReady(PredSU)) { 429 AvailableQueue->push(PredSU); 430 } 431 // CapturePred and others may have left the node in the pending queue, avoid 432 // adding it twice. 433 else if (!PredSU->isPending) { 434 PredSU->isPending = true; 435 PendingQueue.push_back(PredSU); 436 } 437 } 438 } 439 440 /// IsChainDependent - Test if Outer is reachable from Inner through 441 /// chain dependencies. 442 static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 443 unsigned NestLevel, 444 const TargetInstrInfo *TII) { 445 SDNode *N = Outer; 446 while (true) { 447 if (N == Inner) 448 return true; 449 // For a TokenFactor, examine each operand. There may be multiple ways 450 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 451 // most nesting in order to ensure that we find the corresponding match. 452 if (N->getOpcode() == ISD::TokenFactor) { 453 for (const SDValue &Op : N->op_values()) 454 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII)) 455 return true; 456 return false; 457 } 458 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 459 if (N->isMachineOpcode()) { 460 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 461 ++NestLevel; 462 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 463 if (NestLevel == 0) 464 return false; 465 --NestLevel; 466 } 467 } 468 // Otherwise, find the chain and continue climbing. 469 for (const SDValue &Op : N->op_values()) 470 if (Op.getValueType() == MVT::Other) { 471 N = Op.getNode(); 472 goto found_chain_operand; 473 } 474 return false; 475 found_chain_operand:; 476 if (N->getOpcode() == ISD::EntryToken) 477 return false; 478 } 479 } 480 481 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 482 /// the corresponding (lowered) CALLSEQ_BEGIN node. 483 /// 484 /// NestLevel and MaxNested are used in recursion to indcate the current level 485 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 486 /// level seen so far. 487 /// 488 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point 489 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 490 static SDNode * 491 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 492 const TargetInstrInfo *TII) { 493 while (true) { 494 // For a TokenFactor, examine each operand. There may be multiple ways 495 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 496 // most nesting in order to ensure that we find the corresponding match. 497 if (N->getOpcode() == ISD::TokenFactor) { 498 SDNode *Best = nullptr; 499 unsigned BestMaxNest = MaxNest; 500 for (const SDValue &Op : N->op_values()) { 501 unsigned MyNestLevel = NestLevel; 502 unsigned MyMaxNest = MaxNest; 503 if (SDNode *New = FindCallSeqStart(Op.getNode(), 504 MyNestLevel, MyMaxNest, TII)) 505 if (!Best || (MyMaxNest > BestMaxNest)) { 506 Best = New; 507 BestMaxNest = MyMaxNest; 508 } 509 } 510 assert(Best); 511 MaxNest = BestMaxNest; 512 return Best; 513 } 514 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 515 if (N->isMachineOpcode()) { 516 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 517 ++NestLevel; 518 MaxNest = std::max(MaxNest, NestLevel); 519 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 520 assert(NestLevel != 0); 521 --NestLevel; 522 if (NestLevel == 0) 523 return N; 524 } 525 } 526 // Otherwise, find the chain and continue climbing. 527 for (const SDValue &Op : N->op_values()) 528 if (Op.getValueType() == MVT::Other) { 529 N = Op.getNode(); 530 goto found_chain_operand; 531 } 532 return nullptr; 533 found_chain_operand:; 534 if (N->getOpcode() == ISD::EntryToken) 535 return nullptr; 536 } 537 } 538 539 /// Call ReleasePred for each predecessor, then update register live def/gen. 540 /// Always update LiveRegDefs for a register dependence even if the current SU 541 /// also defines the register. This effectively create one large live range 542 /// across a sequence of two-address node. This is important because the 543 /// entire chain must be scheduled together. Example: 544 /// 545 /// flags = (3) add 546 /// flags = (2) addc flags 547 /// flags = (1) addc flags 548 /// 549 /// results in 550 /// 551 /// LiveRegDefs[flags] = 3 552 /// LiveRegGens[flags] = 1 553 /// 554 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 555 /// interference on flags. 556 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 557 // Bottom up: release predecessors 558 for (SDep &Pred : SU->Preds) { 559 ReleasePred(SU, &Pred); 560 if (Pred.isAssignedRegDep()) { 561 // This is a physical register dependency and it's impossible or 562 // expensive to copy the register. Make sure nothing that can 563 // clobber the register is scheduled between the predecessor and 564 // this node. 565 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; 566 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && 567 "interference on register dependence"); 568 LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); 569 if (!LiveRegGens[Pred.getReg()]) { 570 ++NumLiveRegs; 571 LiveRegGens[Pred.getReg()] = SU; 572 } 573 } 574 } 575 576 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 577 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 578 // these nodes, to prevent other calls from being interscheduled with them. 579 unsigned CallResource = TRI->getNumRegs(); 580 if (!LiveRegDefs[CallResource]) 581 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 582 if (Node->isMachineOpcode() && 583 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 584 unsigned NestLevel = 0; 585 unsigned MaxNest = 0; 586 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 587 assert(N && "Must find call sequence start"); 588 589 SUnit *Def = &SUnits[N->getNodeId()]; 590 CallSeqEndForStart[Def] = SU; 591 592 ++NumLiveRegs; 593 LiveRegDefs[CallResource] = Def; 594 LiveRegGens[CallResource] = SU; 595 break; 596 } 597 } 598 599 /// Check to see if any of the pending instructions are ready to issue. If 600 /// so, add them to the available queue. 601 void ScheduleDAGRRList::ReleasePending() { 602 if (DisableSchedCycles) { 603 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 604 return; 605 } 606 607 // If the available queue is empty, it is safe to reset MinAvailableCycle. 608 if (AvailableQueue->empty()) 609 MinAvailableCycle = std::numeric_limits<unsigned>::max(); 610 611 // Check to see if any of the pending instructions are ready to issue. If 612 // so, add them to the available queue. 613 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 614 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 615 if (ReadyCycle < MinAvailableCycle) 616 MinAvailableCycle = ReadyCycle; 617 618 if (PendingQueue[i]->isAvailable) { 619 if (!isReady(PendingQueue[i])) 620 continue; 621 AvailableQueue->push(PendingQueue[i]); 622 } 623 PendingQueue[i]->isPending = false; 624 PendingQueue[i] = PendingQueue.back(); 625 PendingQueue.pop_back(); 626 --i; --e; 627 } 628 } 629 630 /// Move the scheduler state forward by the specified number of Cycles. 631 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 632 if (NextCycle <= CurCycle) 633 return; 634 635 IssueCount = 0; 636 AvailableQueue->setCurCycle(NextCycle); 637 if (!HazardRec->isEnabled()) { 638 // Bypass lots of virtual calls in case of long latency. 639 CurCycle = NextCycle; 640 } 641 else { 642 for (; CurCycle != NextCycle; ++CurCycle) { 643 HazardRec->RecedeCycle(); 644 } 645 } 646 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 647 // available Q to release pending nodes at least once before popping. 648 ReleasePending(); 649 } 650 651 /// Move the scheduler state forward until the specified node's dependents are 652 /// ready and can be scheduled with no resource conflicts. 653 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 654 if (DisableSchedCycles) 655 return; 656 657 // FIXME: Nodes such as CopyFromReg probably should not advance the current 658 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 659 // has predecessors the cycle will be advanced when they are scheduled. 660 // But given the crude nature of modeling latency though such nodes, we 661 // currently need to treat these nodes like real instructions. 662 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 663 664 unsigned ReadyCycle = SU->getHeight(); 665 666 // Bump CurCycle to account for latency. We assume the latency of other 667 // available instructions may be hidden by the stall (not a full pipe stall). 668 // This updates the hazard recognizer's cycle before reserving resources for 669 // this instruction. 670 AdvanceToCycle(ReadyCycle); 671 672 // Calls are scheduled in their preceding cycle, so don't conflict with 673 // hazards from instructions after the call. EmitNode will reset the 674 // scoreboard state before emitting the call. 675 if (SU->isCall) 676 return; 677 678 // FIXME: For resource conflicts in very long non-pipelined stages, we 679 // should probably skip ahead here to avoid useless scoreboard checks. 680 int Stalls = 0; 681 while (true) { 682 ScheduleHazardRecognizer::HazardType HT = 683 HazardRec->getHazardType(SU, -Stalls); 684 685 if (HT == ScheduleHazardRecognizer::NoHazard) 686 break; 687 688 ++Stalls; 689 } 690 AdvanceToCycle(CurCycle + Stalls); 691 } 692 693 /// Record this SUnit in the HazardRecognizer. 694 /// Does not update CurCycle. 695 void ScheduleDAGRRList::EmitNode(SUnit *SU) { 696 if (!HazardRec->isEnabled()) 697 return; 698 699 // Check for phys reg copy. 700 if (!SU->getNode()) 701 return; 702 703 switch (SU->getNode()->getOpcode()) { 704 default: 705 assert(SU->getNode()->isMachineOpcode() && 706 "This target-independent node should not be scheduled."); 707 break; 708 case ISD::MERGE_VALUES: 709 case ISD::TokenFactor: 710 case ISD::LIFETIME_START: 711 case ISD::LIFETIME_END: 712 case ISD::CopyToReg: 713 case ISD::CopyFromReg: 714 case ISD::EH_LABEL: 715 // Noops don't affect the scoreboard state. Copies are likely to be 716 // removed. 717 return; 718 case ISD::INLINEASM: 719 case ISD::INLINEASM_BR: 720 // For inline asm, clear the pipeline state. 721 HazardRec->Reset(); 722 return; 723 } 724 if (SU->isCall) { 725 // Calls are scheduled with their preceding instructions. For bottom-up 726 // scheduling, clear the pipeline state before emitting. 727 HazardRec->Reset(); 728 } 729 730 HazardRec->EmitInstruction(SU); 731 } 732 733 static void resetVRegCycle(SUnit *SU); 734 735 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 736 /// count of its predecessors. If a predecessor pending count is zero, add it to 737 /// the Available queue. 738 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 739 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 740 LLVM_DEBUG(dumpNode(*SU)); 741 742 #ifndef NDEBUG 743 if (CurCycle < SU->getHeight()) 744 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight() 745 << "] pipeline stall!\n"); 746 #endif 747 748 // FIXME: Do not modify node height. It may interfere with 749 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 750 // node its ready cycle can aid heuristics, and after scheduling it can 751 // indicate the scheduled cycle. 752 SU->setHeightToAtLeast(CurCycle); 753 754 // Reserve resources for the scheduled instruction. 755 EmitNode(SU); 756 757 Sequence.push_back(SU); 758 759 AvailableQueue->scheduledNode(SU); 760 761 // If HazardRec is disabled, and each inst counts as one cycle, then 762 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 763 // PendingQueue for schedulers that implement HasReadyFilter. 764 if (!HazardRec->isEnabled() && AvgIPC < 2) 765 AdvanceToCycle(CurCycle + 1); 766 767 // Update liveness of predecessors before successors to avoid treating a 768 // two-address node as a live range def. 769 ReleasePredecessors(SU); 770 771 // Release all the implicit physical register defs that are live. 772 for (SDep &Succ : SU->Succs) { 773 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. 774 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) { 775 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 776 --NumLiveRegs; 777 LiveRegDefs[Succ.getReg()] = nullptr; 778 LiveRegGens[Succ.getReg()] = nullptr; 779 releaseInterferences(Succ.getReg()); 780 } 781 } 782 // Release the special call resource dependence, if this is the beginning 783 // of a call. 784 unsigned CallResource = TRI->getNumRegs(); 785 if (LiveRegDefs[CallResource] == SU) 786 for (const SDNode *SUNode = SU->getNode(); SUNode; 787 SUNode = SUNode->getGluedNode()) { 788 if (SUNode->isMachineOpcode() && 789 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 790 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 791 --NumLiveRegs; 792 LiveRegDefs[CallResource] = nullptr; 793 LiveRegGens[CallResource] = nullptr; 794 releaseInterferences(CallResource); 795 } 796 } 797 798 resetVRegCycle(SU); 799 800 SU->isScheduled = true; 801 802 // Conditions under which the scheduler should eagerly advance the cycle: 803 // (1) No available instructions 804 // (2) All pipelines full, so available instructions must have hazards. 805 // 806 // If HazardRec is disabled, the cycle was pre-advanced before calling 807 // ReleasePredecessors. In that case, IssueCount should remain 0. 808 // 809 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 810 if (HazardRec->isEnabled() || AvgIPC > 1) { 811 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 812 ++IssueCount; 813 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 814 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 815 AdvanceToCycle(CurCycle + 1); 816 } 817 } 818 819 /// CapturePred - This does the opposite of ReleasePred. Since SU is being 820 /// unscheduled, increase the succ left count of its predecessors. Remove 821 /// them from AvailableQueue if necessary. 822 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 823 SUnit *PredSU = PredEdge->getSUnit(); 824 if (PredSU->isAvailable) { 825 PredSU->isAvailable = false; 826 if (!PredSU->isPending) 827 AvailableQueue->remove(PredSU); 828 } 829 830 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() && 831 "NumSuccsLeft will overflow!"); 832 ++PredSU->NumSuccsLeft; 833 } 834 835 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 836 /// its predecessor states to reflect the change. 837 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 838 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 839 LLVM_DEBUG(dumpNode(*SU)); 840 841 for (SDep &Pred : SU->Preds) { 842 CapturePred(&Pred); 843 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){ 844 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 845 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && 846 "Physical register dependency violated?"); 847 --NumLiveRegs; 848 LiveRegDefs[Pred.getReg()] = nullptr; 849 LiveRegGens[Pred.getReg()] = nullptr; 850 releaseInterferences(Pred.getReg()); 851 } 852 } 853 854 // Reclaim the special call resource dependence, if this is the beginning 855 // of a call. 856 unsigned CallResource = TRI->getNumRegs(); 857 for (const SDNode *SUNode = SU->getNode(); SUNode; 858 SUNode = SUNode->getGluedNode()) { 859 if (SUNode->isMachineOpcode() && 860 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 861 SUnit *SeqEnd = CallSeqEndForStart[SU]; 862 assert(SeqEnd && "Call sequence start/end must be known"); 863 assert(!LiveRegDefs[CallResource]); 864 assert(!LiveRegGens[CallResource]); 865 ++NumLiveRegs; 866 LiveRegDefs[CallResource] = SU; 867 LiveRegGens[CallResource] = SeqEnd; 868 } 869 } 870 871 // Release the special call resource dependence, if this is the end 872 // of a call. 873 if (LiveRegGens[CallResource] == SU) 874 for (const SDNode *SUNode = SU->getNode(); SUNode; 875 SUNode = SUNode->getGluedNode()) { 876 if (SUNode->isMachineOpcode() && 877 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 878 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 879 assert(LiveRegDefs[CallResource]); 880 assert(LiveRegGens[CallResource]); 881 --NumLiveRegs; 882 LiveRegDefs[CallResource] = nullptr; 883 LiveRegGens[CallResource] = nullptr; 884 releaseInterferences(CallResource); 885 } 886 } 887 888 for (auto &Succ : SU->Succs) { 889 if (Succ.isAssignedRegDep()) { 890 auto Reg = Succ.getReg(); 891 if (!LiveRegDefs[Reg]) 892 ++NumLiveRegs; 893 // This becomes the nearest def. Note that an earlier def may still be 894 // pending if this is a two-address node. 895 LiveRegDefs[Reg] = SU; 896 897 // Update LiveRegGen only if was empty before this unscheduling. 898 // This is to avoid incorrect updating LiveRegGen set in previous run. 899 if (!LiveRegGens[Reg]) { 900 // Find the successor with the lowest height. 901 LiveRegGens[Reg] = Succ.getSUnit(); 902 for (auto &Succ2 : SU->Succs) { 903 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg && 904 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight()) 905 LiveRegGens[Reg] = Succ2.getSUnit(); 906 } 907 } 908 } 909 } 910 if (SU->getHeight() < MinAvailableCycle) 911 MinAvailableCycle = SU->getHeight(); 912 913 SU->setHeightDirty(); 914 SU->isScheduled = false; 915 SU->isAvailable = true; 916 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 917 // Don't make available until backtracking is complete. 918 SU->isPending = true; 919 PendingQueue.push_back(SU); 920 } 921 else { 922 AvailableQueue->push(SU); 923 } 924 AvailableQueue->unscheduledNode(SU); 925 } 926 927 /// After backtracking, the hazard checker needs to be restored to a state 928 /// corresponding the current cycle. 929 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 930 HazardRec->Reset(); 931 932 unsigned LookAhead = std::min((unsigned)Sequence.size(), 933 HazardRec->getMaxLookAhead()); 934 if (LookAhead == 0) 935 return; 936 937 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead); 938 unsigned HazardCycle = (*I)->getHeight(); 939 for (auto E = Sequence.end(); I != E; ++I) { 940 SUnit *SU = *I; 941 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 942 HazardRec->RecedeCycle(); 943 } 944 EmitNode(SU); 945 } 946 } 947 948 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 949 /// BTCycle in order to schedule a specific node. 950 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 951 SUnit *OldSU = Sequence.back(); 952 while (true) { 953 Sequence.pop_back(); 954 // FIXME: use ready cycle instead of height 955 CurCycle = OldSU->getHeight(); 956 UnscheduleNodeBottomUp(OldSU); 957 AvailableQueue->setCurCycle(CurCycle); 958 if (OldSU == BtSU) 959 break; 960 OldSU = Sequence.back(); 961 } 962 963 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 964 965 RestoreHazardCheckerBottomUp(); 966 967 ReleasePending(); 968 969 ++NumBacktracks; 970 } 971 972 static bool isOperandOf(const SUnit *SU, SDNode *N) { 973 for (const SDNode *SUNode = SU->getNode(); SUNode; 974 SUNode = SUNode->getGluedNode()) { 975 if (SUNode->isOperandOf(N)) 976 return true; 977 } 978 return false; 979 } 980 981 /// TryUnfold - Attempt to unfold 982 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { 983 SDNode *N = SU->getNode(); 984 // Use while over if to ease fall through. 985 SmallVector<SDNode *, 2> NewNodes; 986 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 987 return nullptr; 988 989 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 990 991 N = NewNodes[1]; 992 SDNode *LoadNode = NewNodes[0]; 993 unsigned NumVals = N->getNumValues(); 994 unsigned OldNumVals = SU->getNode()->getNumValues(); 995 996 // LoadNode may already exist. This can happen when there is another 997 // load from the same location and producing the same type of value 998 // but it has different alignment or volatileness. 999 bool isNewLoad = true; 1000 SUnit *LoadSU; 1001 if (LoadNode->getNodeId() != -1) { 1002 LoadSU = &SUnits[LoadNode->getNodeId()]; 1003 // If LoadSU has already been scheduled, we should clone it but 1004 // this would negate the benefit to unfolding so just return SU. 1005 if (LoadSU->isScheduled) 1006 return SU; 1007 isNewLoad = false; 1008 } else { 1009 LoadSU = CreateNewSUnit(LoadNode); 1010 LoadNode->setNodeId(LoadSU->NodeNum); 1011 1012 InitNumRegDefsLeft(LoadSU); 1013 computeLatency(LoadSU); 1014 } 1015 1016 bool isNewN = true; 1017 SUnit *NewSU; 1018 // This can only happen when isNewLoad is false. 1019 if (N->getNodeId() != -1) { 1020 NewSU = &SUnits[N->getNodeId()]; 1021 // If NewSU has already been scheduled, we need to clone it, but this 1022 // negates the benefit to unfolding so just return SU. 1023 if (NewSU->isScheduled) { 1024 return SU; 1025 } 1026 isNewN = false; 1027 } else { 1028 NewSU = CreateNewSUnit(N); 1029 N->setNodeId(NewSU->NodeNum); 1030 1031 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1032 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 1033 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 1034 NewSU->isTwoAddress = true; 1035 break; 1036 } 1037 } 1038 if (MCID.isCommutable()) 1039 NewSU->isCommutable = true; 1040 1041 InitNumRegDefsLeft(NewSU); 1042 computeLatency(NewSU); 1043 } 1044 1045 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 1046 1047 // Now that we are committed to unfolding replace DAG Uses. 1048 for (unsigned i = 0; i != NumVals; ++i) 1049 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 1050 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), 1051 SDValue(LoadNode, 1)); 1052 1053 // Record all the edges to and from the old SU, by category. 1054 SmallVector<SDep, 4> ChainPreds; 1055 SmallVector<SDep, 4> ChainSuccs; 1056 SmallVector<SDep, 4> LoadPreds; 1057 SmallVector<SDep, 4> NodePreds; 1058 SmallVector<SDep, 4> NodeSuccs; 1059 for (SDep &Pred : SU->Preds) { 1060 if (Pred.isCtrl()) 1061 ChainPreds.push_back(Pred); 1062 else if (isOperandOf(Pred.getSUnit(), LoadNode)) 1063 LoadPreds.push_back(Pred); 1064 else 1065 NodePreds.push_back(Pred); 1066 } 1067 for (SDep &Succ : SU->Succs) { 1068 if (Succ.isCtrl()) 1069 ChainSuccs.push_back(Succ); 1070 else 1071 NodeSuccs.push_back(Succ); 1072 } 1073 1074 // Now assign edges to the newly-created nodes. 1075 for (const SDep &Pred : ChainPreds) { 1076 RemovePred(SU, Pred); 1077 if (isNewLoad) 1078 AddPredQueued(LoadSU, Pred); 1079 } 1080 for (const SDep &Pred : LoadPreds) { 1081 RemovePred(SU, Pred); 1082 if (isNewLoad) 1083 AddPredQueued(LoadSU, Pred); 1084 } 1085 for (const SDep &Pred : NodePreds) { 1086 RemovePred(SU, Pred); 1087 AddPredQueued(NewSU, Pred); 1088 } 1089 for (SDep &D : NodeSuccs) { 1090 SUnit *SuccDep = D.getSUnit(); 1091 D.setSUnit(SU); 1092 RemovePred(SuccDep, D); 1093 D.setSUnit(NewSU); 1094 AddPredQueued(SuccDep, D); 1095 // Balance register pressure. 1096 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && 1097 !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1098 --NewSU->NumRegDefsLeft; 1099 } 1100 for (SDep &D : ChainSuccs) { 1101 SUnit *SuccDep = D.getSUnit(); 1102 D.setSUnit(SU); 1103 RemovePred(SuccDep, D); 1104 if (isNewLoad) { 1105 D.setSUnit(LoadSU); 1106 AddPredQueued(SuccDep, D); 1107 } 1108 } 1109 1110 // Add a data dependency to reflect that NewSU reads the value defined 1111 // by LoadSU. 1112 SDep D(LoadSU, SDep::Data, 0); 1113 D.setLatency(LoadSU->Latency); 1114 AddPredQueued(NewSU, D); 1115 1116 if (isNewLoad) 1117 AvailableQueue->addNode(LoadSU); 1118 if (isNewN) 1119 AvailableQueue->addNode(NewSU); 1120 1121 ++NumUnfolds; 1122 1123 if (NewSU->NumSuccsLeft == 0) 1124 NewSU->isAvailable = true; 1125 1126 return NewSU; 1127 } 1128 1129 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 1130 /// successors to the newly created node. 1131 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 1132 SDNode *N = SU->getNode(); 1133 if (!N) 1134 return nullptr; 1135 1136 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); 1137 LLVM_DEBUG(dumpNode(*SU)); 1138 1139 if (N->getGluedNode() && 1140 !TII->canCopyGluedNodeDuringSchedule(N)) { 1141 LLVM_DEBUG( 1142 dbgs() 1143 << "Giving up because it has incoming glue and the target does not " 1144 "want to copy it\n"); 1145 return nullptr; 1146 } 1147 1148 SUnit *NewSU; 1149 bool TryUnfold = false; 1150 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 1151 MVT VT = N->getSimpleValueType(i); 1152 if (VT == MVT::Glue) { 1153 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); 1154 return nullptr; 1155 } else if (VT == MVT::Other) 1156 TryUnfold = true; 1157 } 1158 for (const SDValue &Op : N->op_values()) { 1159 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 1160 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { 1161 LLVM_DEBUG( 1162 dbgs() << "Giving up because it one of the operands is glue and " 1163 "the target does not want to copy it\n"); 1164 return nullptr; 1165 } 1166 } 1167 1168 // If possible unfold instruction. 1169 if (TryUnfold) { 1170 SUnit *UnfoldSU = TryUnfoldSU(SU); 1171 if (!UnfoldSU) 1172 return nullptr; 1173 SU = UnfoldSU; 1174 N = SU->getNode(); 1175 // If this can be scheduled don't bother duplicating and just return 1176 if (SU->NumSuccsLeft == 0) 1177 return SU; 1178 } 1179 1180 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1181 NewSU = CreateClone(SU); 1182 1183 // New SUnit has the exact same predecessors. 1184 for (SDep &Pred : SU->Preds) 1185 if (!Pred.isArtificial()) 1186 AddPredQueued(NewSU, Pred); 1187 1188 // Make sure the clone comes after the original. (InstrEmitter assumes 1189 // this ordering.) 1190 AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); 1191 1192 // Only copy scheduled successors. Cut them from old node's successor 1193 // list and move them over. 1194 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1195 for (SDep &Succ : SU->Succs) { 1196 if (Succ.isArtificial()) 1197 continue; 1198 SUnit *SuccSU = Succ.getSUnit(); 1199 if (SuccSU->isScheduled) { 1200 SDep D = Succ; 1201 D.setSUnit(NewSU); 1202 AddPredQueued(SuccSU, D); 1203 D.setSUnit(SU); 1204 DelDeps.emplace_back(SuccSU, D); 1205 } 1206 } 1207 for (const auto &[DelSU, DelD] : DelDeps) 1208 RemovePred(DelSU, DelD); 1209 1210 AvailableQueue->updateNode(SU); 1211 AvailableQueue->addNode(NewSU); 1212 1213 ++NumDups; 1214 return NewSU; 1215 } 1216 1217 /// InsertCopiesAndMoveSuccs - Insert register copies and move all 1218 /// scheduled successors of the given SUnit to the last copy. 1219 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1220 const TargetRegisterClass *DestRC, 1221 const TargetRegisterClass *SrcRC, 1222 SmallVectorImpl<SUnit*> &Copies) { 1223 SUnit *CopyFromSU = CreateNewSUnit(nullptr); 1224 CopyFromSU->CopySrcRC = SrcRC; 1225 CopyFromSU->CopyDstRC = DestRC; 1226 1227 SUnit *CopyToSU = CreateNewSUnit(nullptr); 1228 CopyToSU->CopySrcRC = DestRC; 1229 CopyToSU->CopyDstRC = SrcRC; 1230 1231 // Only copy scheduled successors. Cut them from old node's successor 1232 // list and move them over. 1233 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1234 for (SDep &Succ : SU->Succs) { 1235 if (Succ.isArtificial()) 1236 continue; 1237 SUnit *SuccSU = Succ.getSUnit(); 1238 if (SuccSU->isScheduled) { 1239 SDep D = Succ; 1240 D.setSUnit(CopyToSU); 1241 AddPredQueued(SuccSU, D); 1242 DelDeps.emplace_back(SuccSU, Succ); 1243 } 1244 else { 1245 // Avoid scheduling the def-side copy before other successors. Otherwise, 1246 // we could introduce another physreg interference on the copy and 1247 // continue inserting copies indefinitely. 1248 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); 1249 } 1250 } 1251 for (const auto &[DelSU, DelD] : DelDeps) 1252 RemovePred(DelSU, DelD); 1253 1254 SDep FromDep(SU, SDep::Data, Reg); 1255 FromDep.setLatency(SU->Latency); 1256 AddPredQueued(CopyFromSU, FromDep); 1257 SDep ToDep(CopyFromSU, SDep::Data, 0); 1258 ToDep.setLatency(CopyFromSU->Latency); 1259 AddPredQueued(CopyToSU, ToDep); 1260 1261 AvailableQueue->updateNode(SU); 1262 AvailableQueue->addNode(CopyFromSU); 1263 AvailableQueue->addNode(CopyToSU); 1264 Copies.push_back(CopyFromSU); 1265 Copies.push_back(CopyToSU); 1266 1267 ++NumPRCopies; 1268 } 1269 1270 /// getPhysicalRegisterVT - Returns the ValueType of the physical register 1271 /// definition of the specified node. 1272 /// FIXME: Move to SelectionDAG? 1273 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1274 const TargetInstrInfo *TII) { 1275 unsigned NumRes; 1276 if (N->getOpcode() == ISD::CopyFromReg) { 1277 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. 1278 NumRes = 1; 1279 } else { 1280 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1281 assert(!MCID.implicit_defs().empty() && 1282 "Physical reg def must be in implicit def list!"); 1283 NumRes = MCID.getNumDefs(); 1284 for (MCPhysReg ImpDef : MCID.implicit_defs()) { 1285 if (Reg == ImpDef) 1286 break; 1287 ++NumRes; 1288 } 1289 } 1290 return N->getSimpleValueType(NumRes); 1291 } 1292 1293 /// CheckForLiveRegDef - Return true and update live register vector if the 1294 /// specified register def of the specified SUnit clobbers any "live" registers. 1295 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, 1296 SmallSet<unsigned, 4> &RegAdded, 1297 SmallVectorImpl<unsigned> &LRegs, 1298 const TargetRegisterInfo *TRI, 1299 const SDNode *Node = nullptr) { 1300 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { 1301 1302 // Check if Ref is live. 1303 if (!LiveRegDefs[*AliasI]) continue; 1304 1305 // Allow multiple uses of the same def. 1306 if (LiveRegDefs[*AliasI] == SU) continue; 1307 1308 // Allow multiple uses of same def 1309 if (Node && LiveRegDefs[*AliasI]->getNode() == Node) 1310 continue; 1311 1312 // Add Reg to the set of interfering live regs. 1313 if (RegAdded.insert(*AliasI).second) { 1314 LRegs.push_back(*AliasI); 1315 } 1316 } 1317 } 1318 1319 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 1320 /// by RegMask, and add them to LRegs. 1321 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 1322 ArrayRef<SUnit*> LiveRegDefs, 1323 SmallSet<unsigned, 4> &RegAdded, 1324 SmallVectorImpl<unsigned> &LRegs) { 1325 // Look at all live registers. Skip Reg0 and the special CallResource. 1326 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 1327 if (!LiveRegDefs[i]) continue; 1328 if (LiveRegDefs[i] == SU) continue; 1329 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 1330 if (RegAdded.insert(i).second) 1331 LRegs.push_back(i); 1332 } 1333 } 1334 1335 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 1336 static const uint32_t *getNodeRegMask(const SDNode *N) { 1337 for (const SDValue &Op : N->op_values()) 1338 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode())) 1339 return RegOp->getRegMask(); 1340 return nullptr; 1341 } 1342 1343 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1344 /// scheduling of the given node to satisfy live physical register dependencies. 1345 /// If the specific node is the last one that's available to schedule, do 1346 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1347 bool ScheduleDAGRRList:: 1348 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { 1349 if (NumLiveRegs == 0) 1350 return false; 1351 1352 SmallSet<unsigned, 4> RegAdded; 1353 // If this node would clobber any "live" register, then it's not ready. 1354 // 1355 // If SU is the currently live definition of the same register that it uses, 1356 // then we are free to schedule it. 1357 for (SDep &Pred : SU->Preds) { 1358 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU) 1359 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), 1360 RegAdded, LRegs, TRI); 1361 } 1362 1363 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1364 if (Node->getOpcode() == ISD::INLINEASM || 1365 Node->getOpcode() == ISD::INLINEASM_BR) { 1366 // Inline asm can clobber physical defs. 1367 unsigned NumOps = Node->getNumOperands(); 1368 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1369 --NumOps; // Ignore the glue operand. 1370 1371 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1372 unsigned Flags = Node->getConstantOperandVal(i); 1373 const InlineAsm::Flag F(Flags); 1374 unsigned NumVals = F.getNumOperandRegisters(); 1375 1376 ++i; // Skip the ID value. 1377 if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || 1378 F.isClobberKind()) { 1379 // Check for def of register or earlyclobber register. 1380 for (; NumVals; --NumVals, ++i) { 1381 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1382 if (Reg.isPhysical()) 1383 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1384 } 1385 } else 1386 i += NumVals; 1387 } 1388 continue; 1389 } 1390 1391 if (Node->getOpcode() == ISD::CopyToReg) { 1392 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); 1393 if (Reg.isPhysical()) { 1394 SDNode *SrcNode = Node->getOperand(2).getNode(); 1395 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI, 1396 SrcNode); 1397 } 1398 } 1399 1400 if (!Node->isMachineOpcode()) 1401 continue; 1402 // If we're in the middle of scheduling a call, don't begin scheduling 1403 // another call. Also, don't allow any physical registers to be live across 1404 // the call. 1405 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 1406 // Check the special calling-sequence resource. 1407 unsigned CallResource = TRI->getNumRegs(); 1408 if (LiveRegDefs[CallResource]) { 1409 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1410 while (SDNode *Glued = Gen->getGluedNode()) 1411 Gen = Glued; 1412 if (!IsChainDependent(Gen, Node, 0, TII) && 1413 RegAdded.insert(CallResource).second) 1414 LRegs.push_back(CallResource); 1415 } 1416 } 1417 if (const uint32_t *RegMask = getNodeRegMask(Node)) 1418 CheckForLiveRegDefMasked(SU, RegMask, 1419 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), 1420 RegAdded, LRegs); 1421 1422 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1423 if (MCID.hasOptionalDef()) { 1424 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. 1425 // This operand can be either a def of CPSR, if the S bit is set; or a use 1426 // of %noreg. When the OptionalDef is set to a valid register, we need to 1427 // handle it in the same way as an ImplicitDef. 1428 for (unsigned i = 0; i < MCID.getNumDefs(); ++i) 1429 if (MCID.operands()[i].isOptionalDef()) { 1430 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); 1431 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); 1432 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1433 } 1434 } 1435 for (MCPhysReg Reg : MCID.implicit_defs()) 1436 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1437 } 1438 1439 return !LRegs.empty(); 1440 } 1441 1442 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { 1443 // Add the nodes that aren't ready back onto the available list. 1444 for (unsigned i = Interferences.size(); i > 0; --i) { 1445 SUnit *SU = Interferences[i-1]; 1446 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); 1447 if (Reg) { 1448 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; 1449 if (!is_contained(LRegs, Reg)) 1450 continue; 1451 } 1452 SU->isPending = false; 1453 // The interfering node may no longer be available due to backtracking. 1454 // Furthermore, it may have been made available again, in which case it is 1455 // now already in the AvailableQueue. 1456 if (SU->isAvailable && !SU->NodeQueueId) { 1457 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); 1458 AvailableQueue->push(SU); 1459 } 1460 if (i < Interferences.size()) 1461 Interferences[i-1] = Interferences.back(); 1462 Interferences.pop_back(); 1463 LRegsMap.erase(LRegsPos); 1464 } 1465 } 1466 1467 /// Return a node that can be scheduled in this cycle. Requirements: 1468 /// (1) Ready: latency has been satisfied 1469 /// (2) No Hazards: resources are available 1470 /// (3) No Interferences: may unschedule to break register interferences. 1471 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1472 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); 1473 auto FindAvailableNode = [&]() { 1474 while (CurSU) { 1475 SmallVector<unsigned, 4> LRegs; 1476 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1477 break; 1478 LLVM_DEBUG(dbgs() << " Interfering reg "; 1479 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; 1480 else dbgs() << printReg(LRegs[0], TRI); 1481 dbgs() << " SU #" << CurSU->NodeNum << '\n'); 1482 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs); 1483 if (LRegsInserted) { 1484 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1485 Interferences.push_back(CurSU); 1486 } 1487 else { 1488 assert(CurSU->isPending && "Interferences are pending"); 1489 // Update the interference with current live regs. 1490 LRegsIter->second = LRegs; 1491 } 1492 CurSU = AvailableQueue->pop(); 1493 } 1494 }; 1495 FindAvailableNode(); 1496 if (CurSU) 1497 return CurSU; 1498 1499 // We query the topological order in the loop body, so make sure outstanding 1500 // updates are applied before entering it (we only enter the loop if there 1501 // are some interferences). If we make changes to the ordering, we exit 1502 // the loop. 1503 1504 // All candidates are delayed due to live physical reg dependencies. 1505 // Try backtracking, code duplication, or inserting cross class copies 1506 // to resolve it. 1507 for (SUnit *TrySU : Interferences) { 1508 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 1509 1510 // Try unscheduling up to the point where it's safe to schedule 1511 // this node. 1512 SUnit *BtSU = nullptr; 1513 unsigned LiveCycle = std::numeric_limits<unsigned>::max(); 1514 for (unsigned Reg : LRegs) { 1515 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1516 BtSU = LiveRegGens[Reg]; 1517 LiveCycle = BtSU->getHeight(); 1518 } 1519 } 1520 if (!WillCreateCycle(TrySU, BtSU)) { 1521 // BacktrackBottomUp mutates Interferences! 1522 BacktrackBottomUp(TrySU, BtSU); 1523 1524 // Force the current node to be scheduled before the node that 1525 // requires the physical reg dep. 1526 if (BtSU->isAvailable) { 1527 BtSU->isAvailable = false; 1528 if (!BtSU->isPending) 1529 AvailableQueue->remove(BtSU); 1530 } 1531 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum 1532 << ") to SU(" << TrySU->NodeNum << ")\n"); 1533 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial)); 1534 1535 // If one or more successors has been unscheduled, then the current 1536 // node is no longer available. 1537 if (!TrySU->isAvailable || !TrySU->NodeQueueId) { 1538 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); 1539 CurSU = AvailableQueue->pop(); 1540 } else { 1541 LLVM_DEBUG(dbgs() << "TrySU available\n"); 1542 // Available and in AvailableQueue 1543 AvailableQueue->remove(TrySU); 1544 CurSU = TrySU; 1545 } 1546 FindAvailableNode(); 1547 // Interferences has been mutated. We must break. 1548 break; 1549 } 1550 } 1551 1552 if (!CurSU) { 1553 // Can't backtrack. If it's too expensive to copy the value, then try 1554 // duplicate the nodes that produces these "too expensive to copy" 1555 // values to break the dependency. In case even that doesn't work, 1556 // insert cross class copies. 1557 // If it's not too expensive, i.e. cost != -1, issue copies. 1558 SUnit *TrySU = Interferences[0]; 1559 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 1560 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1561 unsigned Reg = LRegs[0]; 1562 SUnit *LRDef = LiveRegDefs[Reg]; 1563 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1564 const TargetRegisterClass *RC = 1565 TRI->getMinimalPhysRegClass(Reg, VT); 1566 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1567 1568 // If cross copy register class is the same as RC, then it must be possible 1569 // copy the value directly. Do not try duplicate the def. 1570 // If cross copy register class is not the same as RC, then it's possible to 1571 // copy the value but it require cross register class copies and it is 1572 // expensive. 1573 // If cross copy register class is null, then it's not possible to copy 1574 // the value at all. 1575 SUnit *NewDef = nullptr; 1576 if (DestRC != RC) { 1577 NewDef = CopyAndMoveSuccessors(LRDef); 1578 if (!DestRC && !NewDef) 1579 report_fatal_error("Can't handle live physical register dependency!"); 1580 } 1581 if (!NewDef) { 1582 // Issue copies, these can be expensive cross register class copies. 1583 SmallVector<SUnit*, 2> Copies; 1584 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1585 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1586 << " to SU #" << Copies.front()->NodeNum << "\n"); 1587 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial)); 1588 NewDef = Copies.back(); 1589 } 1590 1591 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1592 << " to SU #" << TrySU->NodeNum << "\n"); 1593 LiveRegDefs[Reg] = NewDef; 1594 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial)); 1595 TrySU->isAvailable = false; 1596 CurSU = NewDef; 1597 } 1598 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1599 return CurSU; 1600 } 1601 1602 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1603 /// schedulers. 1604 void ScheduleDAGRRList::ListScheduleBottomUp() { 1605 // Release any predecessors of the special Exit node. 1606 ReleasePredecessors(&ExitSU); 1607 1608 // Add root to Available queue. 1609 if (!SUnits.empty()) { 1610 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1611 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1612 RootSU->isAvailable = true; 1613 AvailableQueue->push(RootSU); 1614 } 1615 1616 // While Available queue is not empty, grab the node with the highest 1617 // priority. If it is not ready put it back. Schedule the node. 1618 Sequence.reserve(SUnits.size()); 1619 while (!AvailableQueue->empty() || !Interferences.empty()) { 1620 LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; 1621 AvailableQueue->dump(this)); 1622 1623 // Pick the best node to schedule taking all constraints into 1624 // consideration. 1625 SUnit *SU = PickNodeToScheduleBottomUp(); 1626 1627 AdvancePastStalls(SU); 1628 1629 ScheduleNodeBottomUp(SU); 1630 1631 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1632 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1633 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() && 1634 "MinAvailableCycle uninitialized"); 1635 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1636 } 1637 } 1638 1639 // Reverse the order if it is bottom up. 1640 std::reverse(Sequence.begin(), Sequence.end()); 1641 1642 #ifndef NDEBUG 1643 VerifyScheduledSequence(/*isBottomUp=*/true); 1644 #endif 1645 } 1646 1647 namespace { 1648 1649 class RegReductionPQBase; 1650 1651 struct queue_sort { 1652 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1653 }; 1654 1655 #ifndef NDEBUG 1656 template<class SF> 1657 struct reverse_sort : public queue_sort { 1658 SF &SortFunc; 1659 1660 reverse_sort(SF &sf) : SortFunc(sf) {} 1661 1662 bool operator()(SUnit* left, SUnit* right) const { 1663 // reverse left/right rather than simply !SortFunc(left, right) 1664 // to expose different paths in the comparison logic. 1665 return SortFunc(right, left); 1666 } 1667 }; 1668 #endif // NDEBUG 1669 1670 /// bu_ls_rr_sort - Priority function for bottom up register pressure 1671 // reduction scheduler. 1672 struct bu_ls_rr_sort : public queue_sort { 1673 enum { 1674 IsBottomUp = true, 1675 HasReadyFilter = false 1676 }; 1677 1678 RegReductionPQBase *SPQ; 1679 1680 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1681 1682 bool operator()(SUnit* left, SUnit* right) const; 1683 }; 1684 1685 // src_ls_rr_sort - Priority function for source order scheduler. 1686 struct src_ls_rr_sort : public queue_sort { 1687 enum { 1688 IsBottomUp = true, 1689 HasReadyFilter = false 1690 }; 1691 1692 RegReductionPQBase *SPQ; 1693 1694 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1695 1696 bool operator()(SUnit* left, SUnit* right) const; 1697 }; 1698 1699 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1700 struct hybrid_ls_rr_sort : public queue_sort { 1701 enum { 1702 IsBottomUp = true, 1703 HasReadyFilter = false 1704 }; 1705 1706 RegReductionPQBase *SPQ; 1707 1708 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1709 1710 bool isReady(SUnit *SU, unsigned CurCycle) const; 1711 1712 bool operator()(SUnit* left, SUnit* right) const; 1713 }; 1714 1715 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1716 // scheduler. 1717 struct ilp_ls_rr_sort : public queue_sort { 1718 enum { 1719 IsBottomUp = true, 1720 HasReadyFilter = false 1721 }; 1722 1723 RegReductionPQBase *SPQ; 1724 1725 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1726 1727 bool isReady(SUnit *SU, unsigned CurCycle) const; 1728 1729 bool operator()(SUnit* left, SUnit* right) const; 1730 }; 1731 1732 class RegReductionPQBase : public SchedulingPriorityQueue { 1733 protected: 1734 std::vector<SUnit *> Queue; 1735 unsigned CurQueueId = 0; 1736 bool TracksRegPressure; 1737 bool SrcOrder; 1738 1739 // SUnits - The SUnits for the current graph. 1740 std::vector<SUnit> *SUnits = nullptr; 1741 1742 MachineFunction &MF; 1743 const TargetInstrInfo *TII = nullptr; 1744 const TargetRegisterInfo *TRI = nullptr; 1745 const TargetLowering *TLI = nullptr; 1746 ScheduleDAGRRList *scheduleDAG = nullptr; 1747 1748 // SethiUllmanNumbers - The SethiUllman number for each node. 1749 std::vector<unsigned> SethiUllmanNumbers; 1750 1751 /// RegPressure - Tracking current reg pressure per register class. 1752 std::vector<unsigned> RegPressure; 1753 1754 /// RegLimit - Tracking the number of allocatable registers per register 1755 /// class. 1756 std::vector<unsigned> RegLimit; 1757 1758 public: 1759 RegReductionPQBase(MachineFunction &mf, 1760 bool hasReadyFilter, 1761 bool tracksrp, 1762 bool srcorder, 1763 const TargetInstrInfo *tii, 1764 const TargetRegisterInfo *tri, 1765 const TargetLowering *tli) 1766 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), 1767 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { 1768 if (TracksRegPressure) { 1769 unsigned NumRC = TRI->getNumRegClasses(); 1770 RegLimit.resize(NumRC); 1771 RegPressure.resize(NumRC); 1772 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1773 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1774 for (const TargetRegisterClass *RC : TRI->regclasses()) 1775 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF); 1776 } 1777 } 1778 1779 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1780 scheduleDAG = scheduleDag; 1781 } 1782 1783 ScheduleHazardRecognizer* getHazardRec() { 1784 return scheduleDAG->getHazardRec(); 1785 } 1786 1787 void initNodes(std::vector<SUnit> &sunits) override; 1788 1789 void addNode(const SUnit *SU) override; 1790 1791 void updateNode(const SUnit *SU) override; 1792 1793 void releaseState() override { 1794 SUnits = nullptr; 1795 SethiUllmanNumbers.clear(); 1796 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1797 } 1798 1799 unsigned getNodePriority(const SUnit *SU) const; 1800 1801 unsigned getNodeOrdering(const SUnit *SU) const { 1802 if (!SU->getNode()) return 0; 1803 1804 return SU->getNode()->getIROrder(); 1805 } 1806 1807 bool empty() const override { return Queue.empty(); } 1808 1809 void push(SUnit *U) override { 1810 assert(!U->NodeQueueId && "Node in the queue already"); 1811 U->NodeQueueId = ++CurQueueId; 1812 Queue.push_back(U); 1813 } 1814 1815 void remove(SUnit *SU) override { 1816 assert(!Queue.empty() && "Queue is empty!"); 1817 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1818 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU); 1819 if (I != std::prev(Queue.end())) 1820 std::swap(*I, Queue.back()); 1821 Queue.pop_back(); 1822 SU->NodeQueueId = 0; 1823 } 1824 1825 bool tracksRegPressure() const override { return TracksRegPressure; } 1826 1827 void dumpRegPressure() const; 1828 1829 bool HighRegPressure(const SUnit *SU) const; 1830 1831 bool MayReduceRegPressure(SUnit *SU) const; 1832 1833 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1834 1835 void scheduledNode(SUnit *SU) override; 1836 1837 void unscheduledNode(SUnit *SU) override; 1838 1839 protected: 1840 bool canClobber(const SUnit *SU, const SUnit *Op); 1841 void AddPseudoTwoAddrDeps(); 1842 void PrescheduleNodesWithMultipleUses(); 1843 void CalculateSethiUllmanNumbers(); 1844 }; 1845 1846 template<class SF> 1847 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { 1848 unsigned BestIdx = 0; 1849 // Only compute the cost for the first 1000 items in the queue, to avoid 1850 // excessive compile-times for very large queues. 1851 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E; 1852 I++) 1853 if (Picker(Q[BestIdx], Q[I])) 1854 BestIdx = I; 1855 SUnit *V = Q[BestIdx]; 1856 if (BestIdx + 1 != Q.size()) 1857 std::swap(Q[BestIdx], Q.back()); 1858 Q.pop_back(); 1859 return V; 1860 } 1861 1862 template<class SF> 1863 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { 1864 #ifndef NDEBUG 1865 if (DAG->StressSched) { 1866 reverse_sort<SF> RPicker(Picker); 1867 return popFromQueueImpl(Q, RPicker); 1868 } 1869 #endif 1870 (void)DAG; 1871 return popFromQueueImpl(Q, Picker); 1872 } 1873 1874 //===----------------------------------------------------------------------===// 1875 // RegReductionPriorityQueue Definition 1876 //===----------------------------------------------------------------------===// 1877 // 1878 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1879 // to reduce register pressure. 1880 // 1881 template<class SF> 1882 class RegReductionPriorityQueue : public RegReductionPQBase { 1883 SF Picker; 1884 1885 public: 1886 RegReductionPriorityQueue(MachineFunction &mf, 1887 bool tracksrp, 1888 bool srcorder, 1889 const TargetInstrInfo *tii, 1890 const TargetRegisterInfo *tri, 1891 const TargetLowering *tli) 1892 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, 1893 tii, tri, tli), 1894 Picker(this) {} 1895 1896 bool isBottomUp() const override { return SF::IsBottomUp; } 1897 1898 bool isReady(SUnit *U) const override { 1899 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1900 } 1901 1902 SUnit *pop() override { 1903 if (Queue.empty()) return nullptr; 1904 1905 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1906 V->NodeQueueId = 0; 1907 return V; 1908 } 1909 1910 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1911 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { 1912 // Emulate pop() without clobbering NodeQueueIds. 1913 std::vector<SUnit *> DumpQueue = Queue; 1914 SF DumpPicker = Picker; 1915 while (!DumpQueue.empty()) { 1916 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1917 dbgs() << "Height " << SU->getHeight() << ": "; 1918 DAG->dumpNode(*SU); 1919 } 1920 } 1921 #endif 1922 }; 1923 1924 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>; 1925 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>; 1926 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>; 1927 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>; 1928 1929 } // end anonymous namespace 1930 1931 //===----------------------------------------------------------------------===// 1932 // Static Node Priority for Register Pressure Reduction 1933 //===----------------------------------------------------------------------===// 1934 1935 // Check for special nodes that bypass scheduling heuristics. 1936 // Currently this pushes TokenFactor nodes down, but may be used for other 1937 // pseudo-ops as well. 1938 // 1939 // Return -1 to schedule right above left, 1 for left above right. 1940 // Return 0 if no bias exists. 1941 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1942 bool LSchedLow = left->isScheduleLow; 1943 bool RSchedLow = right->isScheduleLow; 1944 if (LSchedLow != RSchedLow) 1945 return LSchedLow < RSchedLow ? 1 : -1; 1946 return 0; 1947 } 1948 1949 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1950 /// Smaller number is the higher priority. 1951 static unsigned 1952 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1953 if (SUNumbers[SU->NodeNum] != 0) 1954 return SUNumbers[SU->NodeNum]; 1955 1956 // Use WorkList to avoid stack overflow on excessively large IRs. 1957 struct WorkState { 1958 WorkState(const SUnit *SU) : SU(SU) {} 1959 const SUnit *SU; 1960 unsigned PredsProcessed = 0; 1961 }; 1962 1963 SmallVector<WorkState, 16> WorkList; 1964 WorkList.push_back(SU); 1965 while (!WorkList.empty()) { 1966 auto &Temp = WorkList.back(); 1967 auto *TempSU = Temp.SU; 1968 bool AllPredsKnown = true; 1969 // Try to find a non-evaluated pred and push it into the processing stack. 1970 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { 1971 auto &Pred = TempSU->Preds[P]; 1972 if (Pred.isCtrl()) continue; // ignore chain preds 1973 SUnit *PredSU = Pred.getSUnit(); 1974 if (SUNumbers[PredSU->NodeNum] == 0) { 1975 #ifndef NDEBUG 1976 // In debug mode, check that we don't have such element in the stack. 1977 for (auto It : WorkList) 1978 assert(It.SU != PredSU && "Trying to push an element twice?"); 1979 #endif 1980 // Next time start processing this one starting from the next pred. 1981 Temp.PredsProcessed = P + 1; 1982 WorkList.push_back(PredSU); 1983 AllPredsKnown = false; 1984 break; 1985 } 1986 } 1987 1988 if (!AllPredsKnown) 1989 continue; 1990 1991 // Once all preds are known, we can calculate the answer for this one. 1992 unsigned SethiUllmanNumber = 0; 1993 unsigned Extra = 0; 1994 for (const SDep &Pred : TempSU->Preds) { 1995 if (Pred.isCtrl()) continue; // ignore chain preds 1996 SUnit *PredSU = Pred.getSUnit(); 1997 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; 1998 assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); 1999 if (PredSethiUllman > SethiUllmanNumber) { 2000 SethiUllmanNumber = PredSethiUllman; 2001 Extra = 0; 2002 } else if (PredSethiUllman == SethiUllmanNumber) 2003 ++Extra; 2004 } 2005 2006 SethiUllmanNumber += Extra; 2007 if (SethiUllmanNumber == 0) 2008 SethiUllmanNumber = 1; 2009 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; 2010 WorkList.pop_back(); 2011 } 2012 2013 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); 2014 return SUNumbers[SU->NodeNum]; 2015 } 2016 2017 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 2018 /// scheduling units. 2019 void RegReductionPQBase::CalculateSethiUllmanNumbers() { 2020 SethiUllmanNumbers.assign(SUnits->size(), 0); 2021 2022 for (const SUnit &SU : *SUnits) 2023 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); 2024 } 2025 2026 void RegReductionPQBase::addNode(const SUnit *SU) { 2027 unsigned SUSize = SethiUllmanNumbers.size(); 2028 if (SUnits->size() > SUSize) 2029 SethiUllmanNumbers.resize(SUSize*2, 0); 2030 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 2031 } 2032 2033 void RegReductionPQBase::updateNode(const SUnit *SU) { 2034 SethiUllmanNumbers[SU->NodeNum] = 0; 2035 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 2036 } 2037 2038 // Lower priority means schedule further down. For bottom-up scheduling, lower 2039 // priority SUs are scheduled before higher priority SUs. 2040 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 2041 assert(SU->NodeNum < SethiUllmanNumbers.size()); 2042 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2043 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2044 // CopyToReg should be close to its uses to facilitate coalescing and 2045 // avoid spilling. 2046 return 0; 2047 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2048 Opc == TargetOpcode::SUBREG_TO_REG || 2049 Opc == TargetOpcode::INSERT_SUBREG) 2050 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2051 // close to their uses to facilitate coalescing. 2052 return 0; 2053 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 2054 // If SU does not have a register use, i.e. it doesn't produce a value 2055 // that would be consumed (e.g. store), then it terminates a chain of 2056 // computation. Give it a large SethiUllman number so it will be 2057 // scheduled right before its predecessors that it doesn't lengthen 2058 // their live ranges. 2059 return 0xffff; 2060 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2061 // If SU does not have a register def, schedule it close to its uses 2062 // because it does not lengthen any live ranges. 2063 return 0; 2064 #if 1 2065 return SethiUllmanNumbers[SU->NodeNum]; 2066 #else 2067 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 2068 if (SU->isCallOp) { 2069 // FIXME: This assumes all of the defs are used as call operands. 2070 int NP = (int)Priority - SU->getNode()->getNumValues(); 2071 return (NP > 0) ? NP : 0; 2072 } 2073 return Priority; 2074 #endif 2075 } 2076 2077 //===----------------------------------------------------------------------===// 2078 // Register Pressure Tracking 2079 //===----------------------------------------------------------------------===// 2080 2081 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2082 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { 2083 for (const TargetRegisterClass *RC : TRI->regclasses()) { 2084 unsigned Id = RC->getID(); 2085 unsigned RP = RegPressure[Id]; 2086 if (!RP) continue; 2087 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " 2088 << RegLimit[Id] << '\n'); 2089 } 2090 } 2091 #endif 2092 2093 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 2094 if (!TLI) 2095 return false; 2096 2097 for (const SDep &Pred : SU->Preds) { 2098 if (Pred.isCtrl()) 2099 continue; 2100 SUnit *PredSU = Pred.getSUnit(); 2101 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2102 // to cover the number of registers defined (they are all live). 2103 if (PredSU->NumRegDefsLeft == 0) { 2104 continue; 2105 } 2106 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2107 RegDefPos.IsValid(); RegDefPos.Advance()) { 2108 unsigned RCId, Cost; 2109 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2110 2111 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 2112 return true; 2113 } 2114 } 2115 return false; 2116 } 2117 2118 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 2119 const SDNode *N = SU->getNode(); 2120 2121 if (!N->isMachineOpcode() || !SU->NumSuccs) 2122 return false; 2123 2124 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2125 for (unsigned i = 0; i != NumDefs; ++i) { 2126 MVT VT = N->getSimpleValueType(i); 2127 if (!N->hasAnyUseOfValue(i)) 2128 continue; 2129 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2130 if (RegPressure[RCId] >= RegLimit[RCId]) 2131 return true; 2132 } 2133 return false; 2134 } 2135 2136 // Compute the register pressure contribution by this instruction by count up 2137 // for uses that are not live and down for defs. Only count register classes 2138 // that are already under high pressure. As a side effect, compute the number of 2139 // uses of registers that are already live. 2140 // 2141 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 2142 // so could probably be factored. 2143 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 2144 LiveUses = 0; 2145 int PDiff = 0; 2146 for (const SDep &Pred : SU->Preds) { 2147 if (Pred.isCtrl()) 2148 continue; 2149 SUnit *PredSU = Pred.getSUnit(); 2150 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2151 // to cover the number of registers defined (they are all live). 2152 if (PredSU->NumRegDefsLeft == 0) { 2153 if (PredSU->getNode()->isMachineOpcode()) 2154 ++LiveUses; 2155 continue; 2156 } 2157 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2158 RegDefPos.IsValid(); RegDefPos.Advance()) { 2159 MVT VT = RegDefPos.GetValue(); 2160 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2161 if (RegPressure[RCId] >= RegLimit[RCId]) 2162 ++PDiff; 2163 } 2164 } 2165 const SDNode *N = SU->getNode(); 2166 2167 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 2168 return PDiff; 2169 2170 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2171 for (unsigned i = 0; i != NumDefs; ++i) { 2172 MVT VT = N->getSimpleValueType(i); 2173 if (!N->hasAnyUseOfValue(i)) 2174 continue; 2175 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2176 if (RegPressure[RCId] >= RegLimit[RCId]) 2177 --PDiff; 2178 } 2179 return PDiff; 2180 } 2181 2182 void RegReductionPQBase::scheduledNode(SUnit *SU) { 2183 if (!TracksRegPressure) 2184 return; 2185 2186 if (!SU->getNode()) 2187 return; 2188 2189 for (const SDep &Pred : SU->Preds) { 2190 if (Pred.isCtrl()) 2191 continue; 2192 SUnit *PredSU = Pred.getSUnit(); 2193 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2194 // to cover the number of registers defined (they are all live). 2195 if (PredSU->NumRegDefsLeft == 0) { 2196 continue; 2197 } 2198 // FIXME: The ScheduleDAG currently loses information about which of a 2199 // node's values is consumed by each dependence. Consequently, if the node 2200 // defines multiple register classes, we don't know which to pressurize 2201 // here. Instead the following loop consumes the register defs in an 2202 // arbitrary order. At least it handles the common case of clustered loads 2203 // to the same class. For precise liveness, each SDep needs to indicate the 2204 // result number. But that tightly couples the ScheduleDAG with the 2205 // SelectionDAG making updates tricky. A simpler hack would be to attach a 2206 // value type or register class to SDep. 2207 // 2208 // The most important aspect of register tracking is balancing the increase 2209 // here with the reduction further below. Note that this SU may use multiple 2210 // defs in PredSU. The can't be determined here, but we've already 2211 // compensated by reducing NumRegDefsLeft in PredSU during 2212 // ScheduleDAGSDNodes::AddSchedEdges. 2213 --PredSU->NumRegDefsLeft; 2214 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 2215 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2216 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2217 if (SkipRegDefs) 2218 continue; 2219 2220 unsigned RCId, Cost; 2221 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2222 RegPressure[RCId] += Cost; 2223 break; 2224 } 2225 } 2226 2227 // We should have this assert, but there may be dead SDNodes that never 2228 // materialize as SUnits, so they don't appear to generate liveness. 2229 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2230 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2231 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2232 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2233 if (SkipRegDefs > 0) 2234 continue; 2235 unsigned RCId, Cost; 2236 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2237 if (RegPressure[RCId] < Cost) { 2238 // Register pressure tracking is imprecise. This can happen. But we try 2239 // hard not to let it happen because it likely results in poor scheduling. 2240 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum 2241 << ") has too many regdefs\n"); 2242 RegPressure[RCId] = 0; 2243 } 2244 else { 2245 RegPressure[RCId] -= Cost; 2246 } 2247 } 2248 LLVM_DEBUG(dumpRegPressure()); 2249 } 2250 2251 void RegReductionPQBase::unscheduledNode(SUnit *SU) { 2252 if (!TracksRegPressure) 2253 return; 2254 2255 const SDNode *N = SU->getNode(); 2256 if (!N) return; 2257 2258 if (!N->isMachineOpcode()) { 2259 if (N->getOpcode() != ISD::CopyToReg) 2260 return; 2261 } else { 2262 unsigned Opc = N->getMachineOpcode(); 2263 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2264 Opc == TargetOpcode::INSERT_SUBREG || 2265 Opc == TargetOpcode::SUBREG_TO_REG || 2266 Opc == TargetOpcode::REG_SEQUENCE || 2267 Opc == TargetOpcode::IMPLICIT_DEF) 2268 return; 2269 } 2270 2271 for (const SDep &Pred : SU->Preds) { 2272 if (Pred.isCtrl()) 2273 continue; 2274 SUnit *PredSU = Pred.getSUnit(); 2275 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2276 // counts data deps. 2277 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2278 continue; 2279 const SDNode *PN = PredSU->getNode(); 2280 if (!PN->isMachineOpcode()) { 2281 if (PN->getOpcode() == ISD::CopyFromReg) { 2282 MVT VT = PN->getSimpleValueType(0); 2283 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2284 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2285 } 2286 continue; 2287 } 2288 unsigned POpc = PN->getMachineOpcode(); 2289 if (POpc == TargetOpcode::IMPLICIT_DEF) 2290 continue; 2291 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2292 POpc == TargetOpcode::INSERT_SUBREG || 2293 POpc == TargetOpcode::SUBREG_TO_REG) { 2294 MVT VT = PN->getSimpleValueType(0); 2295 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2296 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2297 continue; 2298 } 2299 if (POpc == TargetOpcode::REG_SEQUENCE) { 2300 unsigned DstRCIdx = PN->getConstantOperandVal(0); 2301 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 2302 unsigned RCId = RC->getID(); 2303 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used 2304 // here. Instead use the same constant as in GetCostForDef. 2305 RegPressure[RCId] += RegSequenceCost; 2306 continue; 2307 } 2308 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2309 for (unsigned i = 0; i != NumDefs; ++i) { 2310 MVT VT = PN->getSimpleValueType(i); 2311 if (!PN->hasAnyUseOfValue(i)) 2312 continue; 2313 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2314 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2315 // Register pressure tracking is imprecise. This can happen. 2316 RegPressure[RCId] = 0; 2317 else 2318 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2319 } 2320 } 2321 2322 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2323 // may transfer data dependencies to CopyToReg. 2324 if (SU->NumSuccs && N->isMachineOpcode()) { 2325 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2326 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2327 MVT VT = N->getSimpleValueType(i); 2328 if (VT == MVT::Glue || VT == MVT::Other) 2329 continue; 2330 if (!N->hasAnyUseOfValue(i)) 2331 continue; 2332 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2333 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2334 } 2335 } 2336 2337 LLVM_DEBUG(dumpRegPressure()); 2338 } 2339 2340 //===----------------------------------------------------------------------===// 2341 // Dynamic Node Priority for Register Pressure Reduction 2342 //===----------------------------------------------------------------------===// 2343 2344 /// closestSucc - Returns the scheduled cycle of the successor which is 2345 /// closest to the current cycle. 2346 static unsigned closestSucc(const SUnit *SU) { 2347 unsigned MaxHeight = 0; 2348 for (const SDep &Succ : SU->Succs) { 2349 if (Succ.isCtrl()) continue; // ignore chain succs 2350 unsigned Height = Succ.getSUnit()->getHeight(); 2351 // If there are bunch of CopyToRegs stacked up, they should be considered 2352 // to be at the same position. 2353 if (Succ.getSUnit()->getNode() && 2354 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2355 Height = closestSucc(Succ.getSUnit())+1; 2356 if (Height > MaxHeight) 2357 MaxHeight = Height; 2358 } 2359 return MaxHeight; 2360 } 2361 2362 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 2363 /// for scratch registers, i.e. number of data dependencies. 2364 static unsigned calcMaxScratches(const SUnit *SU) { 2365 unsigned Scratches = 0; 2366 for (const SDep &Pred : SU->Preds) { 2367 if (Pred.isCtrl()) continue; // ignore chain preds 2368 Scratches++; 2369 } 2370 return Scratches; 2371 } 2372 2373 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2374 /// CopyFromReg from a virtual register. 2375 static bool hasOnlyLiveInOpers(const SUnit *SU) { 2376 bool RetVal = false; 2377 for (const SDep &Pred : SU->Preds) { 2378 if (Pred.isCtrl()) continue; 2379 const SUnit *PredSU = Pred.getSUnit(); 2380 if (PredSU->getNode() && 2381 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2382 Register Reg = 2383 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2384 if (Reg.isVirtual()) { 2385 RetVal = true; 2386 continue; 2387 } 2388 } 2389 return false; 2390 } 2391 return RetVal; 2392 } 2393 2394 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2395 /// CopyToReg to a virtual register. This SU def is probably a liveout and 2396 /// it has no other use. It should be scheduled closer to the terminator. 2397 static bool hasOnlyLiveOutUses(const SUnit *SU) { 2398 bool RetVal = false; 2399 for (const SDep &Succ : SU->Succs) { 2400 if (Succ.isCtrl()) continue; 2401 const SUnit *SuccSU = Succ.getSUnit(); 2402 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2403 Register Reg = 2404 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2405 if (Reg.isVirtual()) { 2406 RetVal = true; 2407 continue; 2408 } 2409 } 2410 return false; 2411 } 2412 return RetVal; 2413 } 2414 2415 // Set isVRegCycle for a node with only live in opers and live out uses. Also 2416 // set isVRegCycle for its CopyFromReg operands. 2417 // 2418 // This is only relevant for single-block loops, in which case the VRegCycle 2419 // node is likely an induction variable in which the operand and target virtual 2420 // registers should be coalesced (e.g. pre/post increment values). Setting the 2421 // isVRegCycle flag helps the scheduler prioritize other uses of the same 2422 // CopyFromReg so that this node becomes the virtual register "kill". This 2423 // avoids interference between the values live in and out of the block and 2424 // eliminates a copy inside the loop. 2425 static void initVRegCycle(SUnit *SU) { 2426 if (DisableSchedVRegCycle) 2427 return; 2428 2429 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2430 return; 2431 2432 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2433 2434 SU->isVRegCycle = true; 2435 2436 for (const SDep &Pred : SU->Preds) { 2437 if (Pred.isCtrl()) continue; 2438 Pred.getSUnit()->isVRegCycle = true; 2439 } 2440 } 2441 2442 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2443 // CopyFromReg operands. We should no longer penalize other uses of this VReg. 2444 static void resetVRegCycle(SUnit *SU) { 2445 if (!SU->isVRegCycle) 2446 return; 2447 2448 for (const SDep &Pred : SU->Preds) { 2449 if (Pred.isCtrl()) continue; // ignore chain preds 2450 SUnit *PredSU = Pred.getSUnit(); 2451 if (PredSU->isVRegCycle) { 2452 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2453 "VRegCycle def must be CopyFromReg"); 2454 Pred.getSUnit()->isVRegCycle = false; 2455 } 2456 } 2457 } 2458 2459 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2460 // means a node that defines the VRegCycle has not been scheduled yet. 2461 static bool hasVRegCycleUse(const SUnit *SU) { 2462 // If this SU also defines the VReg, don't hoist it as a "use". 2463 if (SU->isVRegCycle) 2464 return false; 2465 2466 for (const SDep &Pred : SU->Preds) { 2467 if (Pred.isCtrl()) continue; // ignore chain preds 2468 if (Pred.getSUnit()->isVRegCycle && 2469 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2470 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2471 return true; 2472 } 2473 } 2474 return false; 2475 } 2476 2477 // Check for either a dependence (latency) or resource (hazard) stall. 2478 // 2479 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2480 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2481 if ((int)SPQ->getCurCycle() < Height) return true; 2482 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2483 != ScheduleHazardRecognizer::NoHazard) 2484 return true; 2485 return false; 2486 } 2487 2488 // Return -1 if left has higher priority, 1 if right has higher priority. 2489 // Return 0 if latency-based priority is equivalent. 2490 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2491 RegReductionPQBase *SPQ) { 2492 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2493 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2494 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2495 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2496 int LHeight = (int)left->getHeight() + LPenalty; 2497 int RHeight = (int)right->getHeight() + RPenalty; 2498 2499 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2500 BUHasStall(left, LHeight, SPQ); 2501 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2502 BUHasStall(right, RHeight, SPQ); 2503 2504 // If scheduling one of the node will cause a pipeline stall, delay it. 2505 // If scheduling either one of the node will cause a pipeline stall, sort 2506 // them according to their height. 2507 if (LStall) { 2508 if (!RStall) 2509 return 1; 2510 if (LHeight != RHeight) 2511 return LHeight > RHeight ? 1 : -1; 2512 } else if (RStall) 2513 return -1; 2514 2515 // If either node is scheduling for latency, sort them by height/depth 2516 // and latency. 2517 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2518 right->SchedulingPref == Sched::ILP)) { 2519 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 2520 // is enabled, grouping instructions by cycle, then its height is already 2521 // covered so only its depth matters. We also reach this point if both stall 2522 // but have the same height. 2523 if (!SPQ->getHazardRec()->isEnabled()) { 2524 if (LHeight != RHeight) 2525 return LHeight > RHeight ? 1 : -1; 2526 } 2527 int LDepth = left->getDepth() - LPenalty; 2528 int RDepth = right->getDepth() - RPenalty; 2529 if (LDepth != RDepth) { 2530 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2531 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2532 << ") depth " << RDepth << "\n"); 2533 return LDepth < RDepth ? 1 : -1; 2534 } 2535 if (left->Latency != right->Latency) 2536 return left->Latency > right->Latency ? 1 : -1; 2537 } 2538 return 0; 2539 } 2540 2541 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2542 // Schedule physical register definitions close to their use. This is 2543 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2544 // long as shortening physreg live ranges is generally good, we can defer 2545 // creating a subtarget hook. 2546 if (!DisableSchedPhysRegJoin) { 2547 bool LHasPhysReg = left->hasPhysRegDefs; 2548 bool RHasPhysReg = right->hasPhysRegDefs; 2549 if (LHasPhysReg != RHasPhysReg) { 2550 #ifndef NDEBUG 2551 static const char *const PhysRegMsg[] = { " has no physreg", 2552 " defines a physreg" }; 2553 #endif 2554 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2555 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum 2556 << ") " << PhysRegMsg[RHasPhysReg] << "\n"); 2557 return LHasPhysReg < RHasPhysReg; 2558 } 2559 } 2560 2561 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2562 unsigned LPriority = SPQ->getNodePriority(left); 2563 unsigned RPriority = SPQ->getNodePriority(right); 2564 2565 // Be really careful about hoisting call operands above previous calls. 2566 // Only allows it if it would reduce register pressure. 2567 if (left->isCall && right->isCallOp) { 2568 unsigned RNumVals = right->getNode()->getNumValues(); 2569 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2570 } 2571 if (right->isCall && left->isCallOp) { 2572 unsigned LNumVals = left->getNode()->getNumValues(); 2573 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2574 } 2575 2576 if (LPriority != RPriority) 2577 return LPriority > RPriority; 2578 2579 // One or both of the nodes are calls and their sethi-ullman numbers are the 2580 // same, then keep source order. 2581 if (left->isCall || right->isCall) { 2582 unsigned LOrder = SPQ->getNodeOrdering(left); 2583 unsigned ROrder = SPQ->getNodeOrdering(right); 2584 2585 // Prefer an ordering where the lower the non-zero order number, the higher 2586 // the preference. 2587 if ((LOrder || ROrder) && LOrder != ROrder) 2588 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2589 } 2590 2591 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2592 // e.g. 2593 // t1 = op t2, c1 2594 // t3 = op t4, c2 2595 // 2596 // and the following instructions are both ready. 2597 // t2 = op c3 2598 // t4 = op c4 2599 // 2600 // Then schedule t2 = op first. 2601 // i.e. 2602 // t4 = op c4 2603 // t2 = op c3 2604 // t1 = op t2, c1 2605 // t3 = op t4, c2 2606 // 2607 // This creates more short live intervals. 2608 unsigned LDist = closestSucc(left); 2609 unsigned RDist = closestSucc(right); 2610 if (LDist != RDist) 2611 return LDist < RDist; 2612 2613 // How many registers becomes live when the node is scheduled. 2614 unsigned LScratch = calcMaxScratches(left); 2615 unsigned RScratch = calcMaxScratches(right); 2616 if (LScratch != RScratch) 2617 return LScratch > RScratch; 2618 2619 // Comparing latency against a call makes little sense unless the node 2620 // is register pressure-neutral. 2621 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2622 return (left->NodeQueueId > right->NodeQueueId); 2623 2624 // Do not compare latencies when one or both of the nodes are calls. 2625 if (!DisableSchedCycles && 2626 !(left->isCall || right->isCall)) { 2627 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2628 if (result != 0) 2629 return result > 0; 2630 } 2631 else { 2632 if (left->getHeight() != right->getHeight()) 2633 return left->getHeight() > right->getHeight(); 2634 2635 if (left->getDepth() != right->getDepth()) 2636 return left->getDepth() < right->getDepth(); 2637 } 2638 2639 assert(left->NodeQueueId && right->NodeQueueId && 2640 "NodeQueueId cannot be zero"); 2641 return (left->NodeQueueId > right->NodeQueueId); 2642 } 2643 2644 // Bottom up 2645 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2646 if (int res = checkSpecialNodes(left, right)) 2647 return res > 0; 2648 2649 return BURRSort(left, right, SPQ); 2650 } 2651 2652 // Source order, otherwise bottom up. 2653 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2654 if (int res = checkSpecialNodes(left, right)) 2655 return res > 0; 2656 2657 unsigned LOrder = SPQ->getNodeOrdering(left); 2658 unsigned ROrder = SPQ->getNodeOrdering(right); 2659 2660 // Prefer an ordering where the lower the non-zero order number, the higher 2661 // the preference. 2662 if ((LOrder || ROrder) && LOrder != ROrder) 2663 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2664 2665 return BURRSort(left, right, SPQ); 2666 } 2667 2668 // If the time between now and when the instruction will be ready can cover 2669 // the spill code, then avoid adding it to the ready queue. This gives long 2670 // stalls highest priority and allows hoisting across calls. It should also 2671 // speed up processing the available queue. 2672 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2673 static const unsigned ReadyDelay = 3; 2674 2675 if (SPQ->MayReduceRegPressure(SU)) return true; 2676 2677 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2678 2679 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2680 != ScheduleHazardRecognizer::NoHazard) 2681 return false; 2682 2683 return true; 2684 } 2685 2686 // Return true if right should be scheduled with higher priority than left. 2687 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2688 if (int res = checkSpecialNodes(left, right)) 2689 return res > 0; 2690 2691 if (left->isCall || right->isCall) 2692 // No way to compute latency of calls. 2693 return BURRSort(left, right, SPQ); 2694 2695 bool LHigh = SPQ->HighRegPressure(left); 2696 bool RHigh = SPQ->HighRegPressure(right); 2697 // Avoid causing spills. If register pressure is high, schedule for 2698 // register pressure reduction. 2699 if (LHigh && !RHigh) { 2700 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2701 << right->NodeNum << ")\n"); 2702 return true; 2703 } 2704 else if (!LHigh && RHigh) { 2705 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2706 << left->NodeNum << ")\n"); 2707 return false; 2708 } 2709 if (!LHigh && !RHigh) { 2710 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2711 if (result != 0) 2712 return result > 0; 2713 } 2714 return BURRSort(left, right, SPQ); 2715 } 2716 2717 // Schedule as many instructions in each cycle as possible. So don't make an 2718 // instruction available unless it is ready in the current cycle. 2719 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2720 if (SU->getHeight() > CurCycle) return false; 2721 2722 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2723 != ScheduleHazardRecognizer::NoHazard) 2724 return false; 2725 2726 return true; 2727 } 2728 2729 static bool canEnableCoalescing(SUnit *SU) { 2730 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2731 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2732 // CopyToReg should be close to its uses to facilitate coalescing and 2733 // avoid spilling. 2734 return true; 2735 2736 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2737 Opc == TargetOpcode::SUBREG_TO_REG || 2738 Opc == TargetOpcode::INSERT_SUBREG) 2739 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2740 // close to their uses to facilitate coalescing. 2741 return true; 2742 2743 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2744 // If SU does not have a register def, schedule it close to its uses 2745 // because it does not lengthen any live ranges. 2746 return true; 2747 2748 return false; 2749 } 2750 2751 // list-ilp is currently an experimental scheduler that allows various 2752 // heuristics to be enabled prior to the normal register reduction logic. 2753 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2754 if (int res = checkSpecialNodes(left, right)) 2755 return res > 0; 2756 2757 if (left->isCall || right->isCall) 2758 // No way to compute latency of calls. 2759 return BURRSort(left, right, SPQ); 2760 2761 unsigned LLiveUses = 0, RLiveUses = 0; 2762 int LPDiff = 0, RPDiff = 0; 2763 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2764 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2765 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2766 } 2767 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2768 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum 2769 << "): " << LPDiff << " != SU(" << right->NodeNum 2770 << "): " << RPDiff << "\n"); 2771 return LPDiff > RPDiff; 2772 } 2773 2774 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2775 bool LReduce = canEnableCoalescing(left); 2776 bool RReduce = canEnableCoalescing(right); 2777 if (LReduce && !RReduce) return false; 2778 if (RReduce && !LReduce) return true; 2779 } 2780 2781 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2782 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2783 << " != SU(" << right->NodeNum << "): " << RLiveUses 2784 << "\n"); 2785 return LLiveUses < RLiveUses; 2786 } 2787 2788 if (!DisableSchedStalls) { 2789 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2790 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2791 if (LStall != RStall) 2792 return left->getHeight() > right->getHeight(); 2793 } 2794 2795 if (!DisableSchedCriticalPath) { 2796 int spread = (int)left->getDepth() - (int)right->getDepth(); 2797 if (std::abs(spread) > MaxReorderWindow) { 2798 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2799 << left->getDepth() << " != SU(" << right->NodeNum 2800 << "): " << right->getDepth() << "\n"); 2801 return left->getDepth() < right->getDepth(); 2802 } 2803 } 2804 2805 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2806 int spread = (int)left->getHeight() - (int)right->getHeight(); 2807 if (std::abs(spread) > MaxReorderWindow) 2808 return left->getHeight() > right->getHeight(); 2809 } 2810 2811 return BURRSort(left, right, SPQ); 2812 } 2813 2814 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2815 SUnits = &sunits; 2816 // Add pseudo dependency edges for two-address nodes. 2817 if (!Disable2AddrHack) 2818 AddPseudoTwoAddrDeps(); 2819 // Reroute edges to nodes with multiple uses. 2820 if (!TracksRegPressure && !SrcOrder) 2821 PrescheduleNodesWithMultipleUses(); 2822 // Calculate node priorities. 2823 CalculateSethiUllmanNumbers(); 2824 2825 // For single block loops, mark nodes that look like canonical IV increments. 2826 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) 2827 for (SUnit &SU : sunits) 2828 initVRegCycle(&SU); 2829 } 2830 2831 //===----------------------------------------------------------------------===// 2832 // Preschedule for Register Pressure 2833 //===----------------------------------------------------------------------===// 2834 2835 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2836 if (SU->isTwoAddress) { 2837 unsigned Opc = SU->getNode()->getMachineOpcode(); 2838 const MCInstrDesc &MCID = TII->get(Opc); 2839 unsigned NumRes = MCID.getNumDefs(); 2840 unsigned NumOps = MCID.getNumOperands() - NumRes; 2841 for (unsigned i = 0; i != NumOps; ++i) { 2842 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2843 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2844 if (DU->getNodeId() != -1 && 2845 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2846 return true; 2847 } 2848 } 2849 } 2850 return false; 2851 } 2852 2853 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2854 /// successor's explicit physregs whose definition can reach DepSU. 2855 /// i.e. DepSU should not be scheduled above SU. 2856 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2857 ScheduleDAGRRList *scheduleDAG, 2858 const TargetInstrInfo *TII, 2859 const TargetRegisterInfo *TRI) { 2860 ArrayRef<MCPhysReg> ImpDefs = 2861 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs(); 2862 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 2863 if (ImpDefs.empty() && !RegMask) 2864 return false; 2865 2866 for (const SDep &Succ : SU->Succs) { 2867 SUnit *SuccSU = Succ.getSUnit(); 2868 for (const SDep &SuccPred : SuccSU->Preds) { 2869 if (!SuccPred.isAssignedRegDep()) 2870 continue; 2871 2872 if (RegMask && 2873 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && 2874 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) 2875 return true; 2876 2877 for (MCPhysReg ImpDef : ImpDefs) { 2878 // Return true if SU clobbers this physical register use and the 2879 // definition of the register reaches from DepSU. IsReachable queries 2880 // a topological forward sort of the DAG (following the successors). 2881 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) && 2882 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) 2883 return true; 2884 } 2885 } 2886 } 2887 return false; 2888 } 2889 2890 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2891 /// physical register defs. 2892 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2893 const TargetInstrInfo *TII, 2894 const TargetRegisterInfo *TRI) { 2895 SDNode *N = SuccSU->getNode(); 2896 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2897 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs(); 2898 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs"); 2899 for (const SDNode *SUNode = SU->getNode(); SUNode; 2900 SUNode = SUNode->getGluedNode()) { 2901 if (!SUNode->isMachineOpcode()) 2902 continue; 2903 ArrayRef<MCPhysReg> SUImpDefs = 2904 TII->get(SUNode->getMachineOpcode()).implicit_defs(); 2905 const uint32_t *SURegMask = getNodeRegMask(SUNode); 2906 if (SUImpDefs.empty() && !SURegMask) 2907 continue; 2908 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2909 MVT VT = N->getSimpleValueType(i); 2910 if (VT == MVT::Glue || VT == MVT::Other) 2911 continue; 2912 if (!N->hasAnyUseOfValue(i)) 2913 continue; 2914 MCPhysReg Reg = ImpDefs[i - NumDefs]; 2915 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 2916 return true; 2917 for (MCPhysReg SUReg : SUImpDefs) { 2918 if (TRI->regsOverlap(Reg, SUReg)) 2919 return true; 2920 } 2921 } 2922 } 2923 return false; 2924 } 2925 2926 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2927 /// are not handled well by the general register pressure reduction 2928 /// heuristics. When presented with code like this: 2929 /// 2930 /// N 2931 /// / | 2932 /// / | 2933 /// U store 2934 /// | 2935 /// ... 2936 /// 2937 /// the heuristics tend to push the store up, but since the 2938 /// operand of the store has another use (U), this would increase 2939 /// the length of that other use (the U->N edge). 2940 /// 2941 /// This function transforms code like the above to route U's 2942 /// dependence through the store when possible, like this: 2943 /// 2944 /// N 2945 /// || 2946 /// || 2947 /// store 2948 /// | 2949 /// U 2950 /// | 2951 /// ... 2952 /// 2953 /// This results in the store being scheduled immediately 2954 /// after N, which shortens the U->N live range, reducing 2955 /// register pressure. 2956 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2957 // Visit all the nodes in topological order, working top-down. 2958 for (SUnit &SU : *SUnits) { 2959 // For now, only look at nodes with no data successors, such as stores. 2960 // These are especially important, due to the heuristics in 2961 // getNodePriority for nodes with no data successors. 2962 if (SU.NumSuccs != 0) 2963 continue; 2964 // For now, only look at nodes with exactly one data predecessor. 2965 if (SU.NumPreds != 1) 2966 continue; 2967 // Avoid prescheduling copies to virtual registers, which don't behave 2968 // like other nodes from the perspective of scheduling heuristics. 2969 if (SDNode *N = SU.getNode()) 2970 if (N->getOpcode() == ISD::CopyToReg && 2971 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) 2972 continue; 2973 2974 SDNode *PredFrameSetup = nullptr; 2975 for (const SDep &Pred : SU.Preds) 2976 if (Pred.isCtrl() && Pred.getSUnit()) { 2977 // Find the predecessor which is not data dependence. 2978 SDNode *PredND = Pred.getSUnit()->getNode(); 2979 2980 // If PredND is FrameSetup, we should not pre-scheduled the node, 2981 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and 2982 // ADJCALLSTACKUP may hold CallResource too long and make other 2983 // calls can't be scheduled. If there's no other available node 2984 // to schedule, the schedular will try to rename the register by 2985 // creating copy to avoid the conflict which will fail because 2986 // CallResource is not a real physical register. 2987 if (PredND && PredND->isMachineOpcode() && 2988 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) { 2989 PredFrameSetup = PredND; 2990 break; 2991 } 2992 } 2993 // Skip the node has FrameSetup parent. 2994 if (PredFrameSetup != nullptr) 2995 continue; 2996 2997 // Locate the single data predecessor. 2998 SUnit *PredSU = nullptr; 2999 for (const SDep &Pred : SU.Preds) 3000 if (!Pred.isCtrl()) { 3001 PredSU = Pred.getSUnit(); 3002 break; 3003 } 3004 assert(PredSU); 3005 3006 // Don't rewrite edges that carry physregs, because that requires additional 3007 // support infrastructure. 3008 if (PredSU->hasPhysRegDefs) 3009 continue; 3010 // Short-circuit the case where SU is PredSU's only data successor. 3011 if (PredSU->NumSuccs == 1) 3012 continue; 3013 // Avoid prescheduling to copies from virtual registers, which don't behave 3014 // like other nodes from the perspective of scheduling heuristics. 3015 if (SDNode *N = SU.getNode()) 3016 if (N->getOpcode() == ISD::CopyFromReg && 3017 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) 3018 continue; 3019 3020 // Perform checks on the successors of PredSU. 3021 for (const SDep &PredSucc : PredSU->Succs) { 3022 SUnit *PredSuccSU = PredSucc.getSUnit(); 3023 if (PredSuccSU == &SU) continue; 3024 // If PredSU has another successor with no data successors, for 3025 // now don't attempt to choose either over the other. 3026 if (PredSuccSU->NumSuccs == 0) 3027 goto outer_loop_continue; 3028 // Don't break physical register dependencies. 3029 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 3030 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) 3031 goto outer_loop_continue; 3032 // Don't introduce graph cycles. 3033 if (scheduleDAG->IsReachable(&SU, PredSuccSU)) 3034 goto outer_loop_continue; 3035 } 3036 3037 // Ok, the transformation is safe and the heuristics suggest it is 3038 // profitable. Update the graph. 3039 LLVM_DEBUG( 3040 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" 3041 << PredSU->NodeNum 3042 << " to guide scheduling in the presence of multiple uses\n"); 3043 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 3044 SDep Edge = PredSU->Succs[i]; 3045 assert(!Edge.isAssignedRegDep()); 3046 SUnit *SuccSU = Edge.getSUnit(); 3047 if (SuccSU != &SU) { 3048 Edge.setSUnit(PredSU); 3049 scheduleDAG->RemovePred(SuccSU, Edge); 3050 scheduleDAG->AddPredQueued(&SU, Edge); 3051 Edge.setSUnit(&SU); 3052 scheduleDAG->AddPredQueued(SuccSU, Edge); 3053 --i; 3054 } 3055 } 3056 outer_loop_continue:; 3057 } 3058 } 3059 3060 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 3061 /// it as a def&use operand. Add a pseudo control edge from it to the other 3062 /// node (if it won't create a cycle) so the two-address one will be scheduled 3063 /// first (lower in the schedule). If both nodes are two-address, favor the 3064 /// one that has a CopyToReg use (more likely to be a loop induction update). 3065 /// If both are two-address, but one is commutable while the other is not 3066 /// commutable, favor the one that's not commutable. 3067 void RegReductionPQBase::AddPseudoTwoAddrDeps() { 3068 for (SUnit &SU : *SUnits) { 3069 if (!SU.isTwoAddress) 3070 continue; 3071 3072 SDNode *Node = SU.getNode(); 3073 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode()) 3074 continue; 3075 3076 bool isLiveOut = hasOnlyLiveOutUses(&SU); 3077 unsigned Opc = Node->getMachineOpcode(); 3078 const MCInstrDesc &MCID = TII->get(Opc); 3079 unsigned NumRes = MCID.getNumDefs(); 3080 unsigned NumOps = MCID.getNumOperands() - NumRes; 3081 for (unsigned j = 0; j != NumOps; ++j) { 3082 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 3083 continue; 3084 SDNode *DU = SU.getNode()->getOperand(j).getNode(); 3085 if (DU->getNodeId() == -1) 3086 continue; 3087 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 3088 if (!DUSU) 3089 continue; 3090 for (const SDep &Succ : DUSU->Succs) { 3091 if (Succ.isCtrl()) 3092 continue; 3093 SUnit *SuccSU = Succ.getSUnit(); 3094 if (SuccSU == &SU) 3095 continue; 3096 // Be conservative. Ignore if nodes aren't at roughly the same 3097 // depth and height. 3098 if (SuccSU->getHeight() < SU.getHeight() && 3099 (SU.getHeight() - SuccSU->getHeight()) > 1) 3100 continue; 3101 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 3102 // constrains whatever is using the copy, instead of the copy 3103 // itself. In the case that the copy is coalesced, this 3104 // preserves the intent of the pseudo two-address heurietics. 3105 while (SuccSU->Succs.size() == 1 && 3106 SuccSU->getNode()->isMachineOpcode() && 3107 SuccSU->getNode()->getMachineOpcode() == 3108 TargetOpcode::COPY_TO_REGCLASS) 3109 SuccSU = SuccSU->Succs.front().getSUnit(); 3110 // Don't constrain non-instruction nodes. 3111 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 3112 continue; 3113 // Don't constrain nodes with physical register defs if the 3114 // predecessor can clobber them. 3115 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) { 3116 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) 3117 continue; 3118 } 3119 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 3120 // these may be coalesced away. We want them close to their uses. 3121 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 3122 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 3123 SuccOpc == TargetOpcode::INSERT_SUBREG || 3124 SuccOpc == TargetOpcode::SUBREG_TO_REG) 3125 continue; 3126 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && 3127 (!canClobber(SuccSU, DUSU) || 3128 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 3129 (!SU.isCommutable && SuccSU->isCommutable)) && 3130 !scheduleDAG->IsReachable(SuccSU, &SU)) { 3131 LLVM_DEBUG(dbgs() 3132 << " Adding a pseudo-two-addr edge from SU #" 3133 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 3134 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial)); 3135 } 3136 } 3137 } 3138 } 3139 } 3140 3141 //===----------------------------------------------------------------------===// 3142 // Public Constructor Functions 3143 //===----------------------------------------------------------------------===// 3144 3145 ScheduleDAGSDNodes *llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 3146 CodeGenOptLevel OptLevel) { 3147 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3148 const TargetInstrInfo *TII = STI.getInstrInfo(); 3149 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3150 3151 BURegReductionPriorityQueue *PQ = 3152 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); 3153 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 3154 PQ->setScheduleDAG(SD); 3155 return SD; 3156 } 3157 3158 ScheduleDAGSDNodes * 3159 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 3160 CodeGenOptLevel OptLevel) { 3161 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3162 const TargetInstrInfo *TII = STI.getInstrInfo(); 3163 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3164 3165 SrcRegReductionPriorityQueue *PQ = 3166 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); 3167 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 3168 PQ->setScheduleDAG(SD); 3169 return SD; 3170 } 3171 3172 ScheduleDAGSDNodes * 3173 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 3174 CodeGenOptLevel OptLevel) { 3175 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3176 const TargetInstrInfo *TII = STI.getInstrInfo(); 3177 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3178 const TargetLowering *TLI = IS->TLI; 3179 3180 HybridBURRPriorityQueue *PQ = 3181 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3182 3183 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3184 PQ->setScheduleDAG(SD); 3185 return SD; 3186 } 3187 3188 ScheduleDAGSDNodes *llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 3189 CodeGenOptLevel OptLevel) { 3190 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3191 const TargetInstrInfo *TII = STI.getInstrInfo(); 3192 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3193 const TargetLowering *TLI = IS->TLI; 3194 3195 ILPBURRPriorityQueue *PQ = 3196 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3197 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3198 PQ->setScheduleDAG(SD); 3199 return SD; 3200 } 3201