1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements bottom-up and top-down register pressure reduction list 10 // schedulers, using standard algorithms. The basic approach uses a priority 11 // queue of available nodes to schedule. One at a time, nodes are taken from 12 // the priority queue (thus in priority order), checked for legality to 13 // schedule, and emitted if legal. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "ScheduleDAGSDNodes.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/CodeGen/ISDOpcodes.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineOperand.h" 27 #include "llvm/CodeGen/MachineValueType.h" 28 #include "llvm/CodeGen/Register.h" 29 #include "llvm/CodeGen/ScheduleDAG.h" 30 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 31 #include "llvm/CodeGen/SchedulerRegistry.h" 32 #include "llvm/CodeGen/SelectionDAGISel.h" 33 #include "llvm/CodeGen/SelectionDAGNodes.h" 34 #include "llvm/CodeGen/TargetInstrInfo.h" 35 #include "llvm/CodeGen/TargetLowering.h" 36 #include "llvm/CodeGen/TargetOpcodes.h" 37 #include "llvm/CodeGen/TargetRegisterInfo.h" 38 #include "llvm/CodeGen/TargetSubtargetInfo.h" 39 #include "llvm/Config/llvm-config.h" 40 #include "llvm/IR/InlineAsm.h" 41 #include "llvm/MC/MCInstrDesc.h" 42 #include "llvm/MC/MCRegisterInfo.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/CommandLine.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/Debug.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include <algorithm> 51 #include <cassert> 52 #include <cstdint> 53 #include <cstdlib> 54 #include <iterator> 55 #include <limits> 56 #include <memory> 57 #include <utility> 58 #include <vector> 59 60 using namespace llvm; 61 62 #define DEBUG_TYPE "pre-RA-sched" 63 64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 65 STATISTIC(NumUnfolds, "Number of nodes unfolded"); 66 STATISTIC(NumDups, "Number of duplicated nodes"); 67 STATISTIC(NumPRCopies, "Number of physical register copies"); 68 69 static RegisterScheduler 70 burrListDAGScheduler("list-burr", 71 "Bottom-up register reduction list scheduling", 72 createBURRListDAGScheduler); 73 74 static RegisterScheduler 75 sourceListDAGScheduler("source", 76 "Similar to list-burr but schedules in source " 77 "order when possible", 78 createSourceListDAGScheduler); 79 80 static RegisterScheduler 81 hybridListDAGScheduler("list-hybrid", 82 "Bottom-up register pressure aware list scheduling " 83 "which tries to balance latency and register pressure", 84 createHybridListDAGScheduler); 85 86 static RegisterScheduler 87 ILPListDAGScheduler("list-ilp", 88 "Bottom-up register pressure aware list scheduling " 89 "which tries to balance ILP and register pressure", 90 createILPListDAGScheduler); 91 92 static cl::opt<bool> DisableSchedCycles( 93 "disable-sched-cycles", cl::Hidden, cl::init(false), 94 cl::desc("Disable cycle-level precision during preRA scheduling")); 95 96 // Temporary sched=list-ilp flags until the heuristics are robust. 97 // Some options are also available under sched=list-hybrid. 98 static cl::opt<bool> DisableSchedRegPressure( 99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 100 cl::desc("Disable regpressure priority in sched=list-ilp")); 101 static cl::opt<bool> DisableSchedLiveUses( 102 "disable-sched-live-uses", cl::Hidden, cl::init(true), 103 cl::desc("Disable live use priority in sched=list-ilp")); 104 static cl::opt<bool> DisableSchedVRegCycle( 105 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 106 cl::desc("Disable virtual register cycle interference checks")); 107 static cl::opt<bool> DisableSchedPhysRegJoin( 108 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 109 cl::desc("Disable physreg def-use affinity")); 110 static cl::opt<bool> DisableSchedStalls( 111 "disable-sched-stalls", cl::Hidden, cl::init(true), 112 cl::desc("Disable no-stall priority in sched=list-ilp")); 113 static cl::opt<bool> DisableSchedCriticalPath( 114 "disable-sched-critical-path", cl::Hidden, cl::init(false), 115 cl::desc("Disable critical path priority in sched=list-ilp")); 116 static cl::opt<bool> DisableSchedHeight( 117 "disable-sched-height", cl::Hidden, cl::init(false), 118 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 119 static cl::opt<bool> Disable2AddrHack( 120 "disable-2addr-hack", cl::Hidden, cl::init(true), 121 cl::desc("Disable scheduler's two-address hack")); 122 123 static cl::opt<int> MaxReorderWindow( 124 "max-sched-reorder", cl::Hidden, cl::init(6), 125 cl::desc("Number of instructions to allow ahead of the critical path " 126 "in sched=list-ilp")); 127 128 static cl::opt<unsigned> AvgIPC( 129 "sched-avg-ipc", cl::Hidden, cl::init(1), 130 cl::desc("Average inst/cycle whan no target itinerary exists.")); 131 132 namespace { 133 134 //===----------------------------------------------------------------------===// 135 /// ScheduleDAGRRList - The actual register reduction list scheduler 136 /// implementation. This supports both top-down and bottom-up scheduling. 137 /// 138 class ScheduleDAGRRList : public ScheduleDAGSDNodes { 139 private: 140 /// NeedLatency - True if the scheduler will make use of latency information. 141 bool NeedLatency; 142 143 /// AvailableQueue - The priority queue to use for the available SUnits. 144 SchedulingPriorityQueue *AvailableQueue; 145 146 /// PendingQueue - This contains all of the instructions whose operands have 147 /// been issued, but their results are not ready yet (due to the latency of 148 /// the operation). Once the operands becomes available, the instruction is 149 /// added to the AvailableQueue. 150 std::vector<SUnit *> PendingQueue; 151 152 /// HazardRec - The hazard recognizer to use. 153 ScheduleHazardRecognizer *HazardRec; 154 155 /// CurCycle - The current scheduler state corresponds to this cycle. 156 unsigned CurCycle = 0; 157 158 /// MinAvailableCycle - Cycle of the soonest available instruction. 159 unsigned MinAvailableCycle = ~0u; 160 161 /// IssueCount - Count instructions issued in this cycle 162 /// Currently valid only for bottom-up scheduling. 163 unsigned IssueCount = 0u; 164 165 /// LiveRegDefs - A set of physical registers and their definition 166 /// that are "live". These nodes must be scheduled before any other nodes that 167 /// modifies the registers can be scheduled. 168 unsigned NumLiveRegs = 0u; 169 std::unique_ptr<SUnit*[]> LiveRegDefs; 170 std::unique_ptr<SUnit*[]> LiveRegGens; 171 172 // Collect interferences between physical register use/defs. 173 // Each interference is an SUnit and set of physical registers. 174 SmallVector<SUnit*, 4> Interferences; 175 176 using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>; 177 178 LRegsMapT LRegsMap; 179 180 /// Topo - A topological ordering for SUnits which permits fast IsReachable 181 /// and similar queries. 182 ScheduleDAGTopologicalSort Topo; 183 184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 185 // DAG crawling. 186 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 187 188 public: 189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 190 SchedulingPriorityQueue *availqueue, 191 CodeGenOpt::Level OptLevel) 192 : ScheduleDAGSDNodes(mf), 193 NeedLatency(needlatency), AvailableQueue(availqueue), 194 Topo(SUnits, nullptr) { 195 const TargetSubtargetInfo &STI = mf.getSubtarget(); 196 if (DisableSchedCycles || !NeedLatency) 197 HazardRec = new ScheduleHazardRecognizer(); 198 else 199 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); 200 } 201 202 ~ScheduleDAGRRList() override { 203 delete HazardRec; 204 delete AvailableQueue; 205 } 206 207 void Schedule() override; 208 209 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 210 211 /// IsReachable - Checks if SU is reachable from TargetSU. 212 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 213 return Topo.IsReachable(SU, TargetSU); 214 } 215 216 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 217 /// create a cycle. 218 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 219 return Topo.WillCreateCycle(SU, TargetSU); 220 } 221 222 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU. 223 /// This returns true if this is a new predecessor. 224 /// Does *NOT* update the topological ordering! It just queues an update. 225 void AddPredQueued(SUnit *SU, const SDep &D) { 226 Topo.AddPredQueued(SU, D.getSUnit()); 227 SU->addPred(D); 228 } 229 230 /// AddPred - adds a predecessor edge to SUnit SU. 231 /// This returns true if this is a new predecessor. 232 /// Updates the topological ordering if required. 233 void AddPred(SUnit *SU, const SDep &D) { 234 Topo.AddPred(SU, D.getSUnit()); 235 SU->addPred(D); 236 } 237 238 /// RemovePred - removes a predecessor edge from SUnit SU. 239 /// This returns true if an edge was removed. 240 /// Updates the topological ordering if required. 241 void RemovePred(SUnit *SU, const SDep &D) { 242 Topo.RemovePred(SU, D.getSUnit()); 243 SU->removePred(D); 244 } 245 246 private: 247 bool isReady(SUnit *SU) { 248 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 249 AvailableQueue->isReady(SU); 250 } 251 252 void ReleasePred(SUnit *SU, const SDep *PredEdge); 253 void ReleasePredecessors(SUnit *SU); 254 void ReleasePending(); 255 void AdvanceToCycle(unsigned NextCycle); 256 void AdvancePastStalls(SUnit *SU); 257 void EmitNode(SUnit *SU); 258 void ScheduleNodeBottomUp(SUnit*); 259 void CapturePred(SDep *PredEdge); 260 void UnscheduleNodeBottomUp(SUnit*); 261 void RestoreHazardCheckerBottomUp(); 262 void BacktrackBottomUp(SUnit*, SUnit*); 263 SUnit *TryUnfoldSU(SUnit *); 264 SUnit *CopyAndMoveSuccessors(SUnit*); 265 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 266 const TargetRegisterClass*, 267 const TargetRegisterClass*, 268 SmallVectorImpl<SUnit*>&); 269 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); 270 271 void releaseInterferences(unsigned Reg = 0); 272 273 SUnit *PickNodeToScheduleBottomUp(); 274 void ListScheduleBottomUp(); 275 276 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 277 SUnit *CreateNewSUnit(SDNode *N) { 278 unsigned NumSUnits = SUnits.size(); 279 SUnit *NewNode = newSUnit(N); 280 // Update the topological ordering. 281 if (NewNode->NodeNum >= NumSUnits) 282 Topo.AddSUnitWithoutPredecessors(NewNode); 283 return NewNode; 284 } 285 286 /// CreateClone - Creates a new SUnit from an existing one. 287 SUnit *CreateClone(SUnit *N) { 288 unsigned NumSUnits = SUnits.size(); 289 SUnit *NewNode = Clone(N); 290 // Update the topological ordering. 291 if (NewNode->NodeNum >= NumSUnits) 292 Topo.AddSUnitWithoutPredecessors(NewNode); 293 return NewNode; 294 } 295 296 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't 297 /// need actual latency information but the hybrid scheduler does. 298 bool forceUnitLatencies() const override { 299 return !NeedLatency; 300 } 301 }; 302 303 } // end anonymous namespace 304 305 static constexpr unsigned RegSequenceCost = 1; 306 307 /// GetCostForDef - Looks up the register class and cost for a given definition. 308 /// Typically this just means looking up the representative register class, 309 /// but for untyped values (MVT::Untyped) it means inspecting the node's 310 /// opcode to determine what register class is being generated. 311 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 312 const TargetLowering *TLI, 313 const TargetInstrInfo *TII, 314 const TargetRegisterInfo *TRI, 315 unsigned &RegClass, unsigned &Cost, 316 const MachineFunction &MF) { 317 MVT VT = RegDefPos.GetValue(); 318 319 // Special handling for untyped values. These values can only come from 320 // the expansion of custom DAG-to-DAG patterns. 321 if (VT == MVT::Untyped) { 322 const SDNode *Node = RegDefPos.GetNode(); 323 324 // Special handling for CopyFromReg of untyped values. 325 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { 326 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); 327 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); 328 RegClass = RC->getID(); 329 Cost = 1; 330 return; 331 } 332 333 unsigned Opcode = Node->getMachineOpcode(); 334 if (Opcode == TargetOpcode::REG_SEQUENCE) { 335 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 336 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 337 RegClass = RC->getID(); 338 Cost = RegSequenceCost; 339 return; 340 } 341 342 unsigned Idx = RegDefPos.GetIdx(); 343 const MCInstrDesc &Desc = TII->get(Opcode); 344 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); 345 assert(RC && "Not a valid register class"); 346 RegClass = RC->getID(); 347 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 348 // better way to determine it. 349 Cost = 1; 350 } else { 351 RegClass = TLI->getRepRegClassFor(VT)->getID(); 352 Cost = TLI->getRepRegClassCostFor(VT); 353 } 354 } 355 356 /// Schedule - Schedule the DAG using list scheduling. 357 void ScheduleDAGRRList::Schedule() { 358 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) 359 << " '" << BB->getName() << "' **********\n"); 360 361 CurCycle = 0; 362 IssueCount = 0; 363 MinAvailableCycle = 364 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max(); 365 NumLiveRegs = 0; 366 // Allocate slots for each physical register, plus one for a special register 367 // to track the virtual resource of a calling sequence. 368 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); 369 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); 370 CallSeqEndForStart.clear(); 371 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); 372 373 // Build the scheduling graph. 374 BuildSchedGraph(nullptr); 375 376 LLVM_DEBUG(dump()); 377 Topo.MarkDirty(); 378 379 AvailableQueue->initNodes(SUnits); 380 381 HazardRec->Reset(); 382 383 // Execute the actual scheduling loop. 384 ListScheduleBottomUp(); 385 386 AvailableQueue->releaseState(); 387 388 LLVM_DEBUG({ 389 dbgs() << "*** Final schedule ***\n"; 390 dumpSchedule(); 391 dbgs() << '\n'; 392 }); 393 } 394 395 //===----------------------------------------------------------------------===// 396 // Bottom-Up Scheduling 397 //===----------------------------------------------------------------------===// 398 399 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 400 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 401 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 402 SUnit *PredSU = PredEdge->getSUnit(); 403 404 #ifndef NDEBUG 405 if (PredSU->NumSuccsLeft == 0) { 406 dbgs() << "*** Scheduling failed! ***\n"; 407 dumpNode(*PredSU); 408 dbgs() << " has been released too many times!\n"; 409 llvm_unreachable(nullptr); 410 } 411 #endif 412 --PredSU->NumSuccsLeft; 413 414 if (!forceUnitLatencies()) { 415 // Updating predecessor's height. This is now the cycle when the 416 // predecessor can be scheduled without causing a pipeline stall. 417 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 418 } 419 420 // If all the node's successors are scheduled, this node is ready 421 // to be scheduled. Ignore the special EntrySU node. 422 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 423 PredSU->isAvailable = true; 424 425 unsigned Height = PredSU->getHeight(); 426 if (Height < MinAvailableCycle) 427 MinAvailableCycle = Height; 428 429 if (isReady(PredSU)) { 430 AvailableQueue->push(PredSU); 431 } 432 // CapturePred and others may have left the node in the pending queue, avoid 433 // adding it twice. 434 else if (!PredSU->isPending) { 435 PredSU->isPending = true; 436 PendingQueue.push_back(PredSU); 437 } 438 } 439 } 440 441 /// IsChainDependent - Test if Outer is reachable from Inner through 442 /// chain dependencies. 443 static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 444 unsigned NestLevel, 445 const TargetInstrInfo *TII) { 446 SDNode *N = Outer; 447 while (true) { 448 if (N == Inner) 449 return true; 450 // For a TokenFactor, examine each operand. There may be multiple ways 451 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 452 // most nesting in order to ensure that we find the corresponding match. 453 if (N->getOpcode() == ISD::TokenFactor) { 454 for (const SDValue &Op : N->op_values()) 455 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII)) 456 return true; 457 return false; 458 } 459 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 460 if (N->isMachineOpcode()) { 461 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 462 ++NestLevel; 463 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 464 if (NestLevel == 0) 465 return false; 466 --NestLevel; 467 } 468 } 469 // Otherwise, find the chain and continue climbing. 470 for (const SDValue &Op : N->op_values()) 471 if (Op.getValueType() == MVT::Other) { 472 N = Op.getNode(); 473 goto found_chain_operand; 474 } 475 return false; 476 found_chain_operand:; 477 if (N->getOpcode() == ISD::EntryToken) 478 return false; 479 } 480 } 481 482 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 483 /// the corresponding (lowered) CALLSEQ_BEGIN node. 484 /// 485 /// NestLevel and MaxNested are used in recursion to indcate the current level 486 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 487 /// level seen so far. 488 /// 489 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point 490 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 491 static SDNode * 492 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 493 const TargetInstrInfo *TII) { 494 while (true) { 495 // For a TokenFactor, examine each operand. There may be multiple ways 496 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 497 // most nesting in order to ensure that we find the corresponding match. 498 if (N->getOpcode() == ISD::TokenFactor) { 499 SDNode *Best = nullptr; 500 unsigned BestMaxNest = MaxNest; 501 for (const SDValue &Op : N->op_values()) { 502 unsigned MyNestLevel = NestLevel; 503 unsigned MyMaxNest = MaxNest; 504 if (SDNode *New = FindCallSeqStart(Op.getNode(), 505 MyNestLevel, MyMaxNest, TII)) 506 if (!Best || (MyMaxNest > BestMaxNest)) { 507 Best = New; 508 BestMaxNest = MyMaxNest; 509 } 510 } 511 assert(Best); 512 MaxNest = BestMaxNest; 513 return Best; 514 } 515 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 516 if (N->isMachineOpcode()) { 517 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 518 ++NestLevel; 519 MaxNest = std::max(MaxNest, NestLevel); 520 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 521 assert(NestLevel != 0); 522 --NestLevel; 523 if (NestLevel == 0) 524 return N; 525 } 526 } 527 // Otherwise, find the chain and continue climbing. 528 for (const SDValue &Op : N->op_values()) 529 if (Op.getValueType() == MVT::Other) { 530 N = Op.getNode(); 531 goto found_chain_operand; 532 } 533 return nullptr; 534 found_chain_operand:; 535 if (N->getOpcode() == ISD::EntryToken) 536 return nullptr; 537 } 538 } 539 540 /// Call ReleasePred for each predecessor, then update register live def/gen. 541 /// Always update LiveRegDefs for a register dependence even if the current SU 542 /// also defines the register. This effectively create one large live range 543 /// across a sequence of two-address node. This is important because the 544 /// entire chain must be scheduled together. Example: 545 /// 546 /// flags = (3) add 547 /// flags = (2) addc flags 548 /// flags = (1) addc flags 549 /// 550 /// results in 551 /// 552 /// LiveRegDefs[flags] = 3 553 /// LiveRegGens[flags] = 1 554 /// 555 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 556 /// interference on flags. 557 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 558 // Bottom up: release predecessors 559 for (SDep &Pred : SU->Preds) { 560 ReleasePred(SU, &Pred); 561 if (Pred.isAssignedRegDep()) { 562 // This is a physical register dependency and it's impossible or 563 // expensive to copy the register. Make sure nothing that can 564 // clobber the register is scheduled between the predecessor and 565 // this node. 566 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; 567 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && 568 "interference on register dependence"); 569 LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); 570 if (!LiveRegGens[Pred.getReg()]) { 571 ++NumLiveRegs; 572 LiveRegGens[Pred.getReg()] = SU; 573 } 574 } 575 } 576 577 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 578 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 579 // these nodes, to prevent other calls from being interscheduled with them. 580 unsigned CallResource = TRI->getNumRegs(); 581 if (!LiveRegDefs[CallResource]) 582 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 583 if (Node->isMachineOpcode() && 584 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 585 unsigned NestLevel = 0; 586 unsigned MaxNest = 0; 587 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 588 assert(N && "Must find call sequence start"); 589 590 SUnit *Def = &SUnits[N->getNodeId()]; 591 CallSeqEndForStart[Def] = SU; 592 593 ++NumLiveRegs; 594 LiveRegDefs[CallResource] = Def; 595 LiveRegGens[CallResource] = SU; 596 break; 597 } 598 } 599 600 /// Check to see if any of the pending instructions are ready to issue. If 601 /// so, add them to the available queue. 602 void ScheduleDAGRRList::ReleasePending() { 603 if (DisableSchedCycles) { 604 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 605 return; 606 } 607 608 // If the available queue is empty, it is safe to reset MinAvailableCycle. 609 if (AvailableQueue->empty()) 610 MinAvailableCycle = std::numeric_limits<unsigned>::max(); 611 612 // Check to see if any of the pending instructions are ready to issue. If 613 // so, add them to the available queue. 614 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 615 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 616 if (ReadyCycle < MinAvailableCycle) 617 MinAvailableCycle = ReadyCycle; 618 619 if (PendingQueue[i]->isAvailable) { 620 if (!isReady(PendingQueue[i])) 621 continue; 622 AvailableQueue->push(PendingQueue[i]); 623 } 624 PendingQueue[i]->isPending = false; 625 PendingQueue[i] = PendingQueue.back(); 626 PendingQueue.pop_back(); 627 --i; --e; 628 } 629 } 630 631 /// Move the scheduler state forward by the specified number of Cycles. 632 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 633 if (NextCycle <= CurCycle) 634 return; 635 636 IssueCount = 0; 637 AvailableQueue->setCurCycle(NextCycle); 638 if (!HazardRec->isEnabled()) { 639 // Bypass lots of virtual calls in case of long latency. 640 CurCycle = NextCycle; 641 } 642 else { 643 for (; CurCycle != NextCycle; ++CurCycle) { 644 HazardRec->RecedeCycle(); 645 } 646 } 647 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 648 // available Q to release pending nodes at least once before popping. 649 ReleasePending(); 650 } 651 652 /// Move the scheduler state forward until the specified node's dependents are 653 /// ready and can be scheduled with no resource conflicts. 654 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 655 if (DisableSchedCycles) 656 return; 657 658 // FIXME: Nodes such as CopyFromReg probably should not advance the current 659 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 660 // has predecessors the cycle will be advanced when they are scheduled. 661 // But given the crude nature of modeling latency though such nodes, we 662 // currently need to treat these nodes like real instructions. 663 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 664 665 unsigned ReadyCycle = SU->getHeight(); 666 667 // Bump CurCycle to account for latency. We assume the latency of other 668 // available instructions may be hidden by the stall (not a full pipe stall). 669 // This updates the hazard recognizer's cycle before reserving resources for 670 // this instruction. 671 AdvanceToCycle(ReadyCycle); 672 673 // Calls are scheduled in their preceding cycle, so don't conflict with 674 // hazards from instructions after the call. EmitNode will reset the 675 // scoreboard state before emitting the call. 676 if (SU->isCall) 677 return; 678 679 // FIXME: For resource conflicts in very long non-pipelined stages, we 680 // should probably skip ahead here to avoid useless scoreboard checks. 681 int Stalls = 0; 682 while (true) { 683 ScheduleHazardRecognizer::HazardType HT = 684 HazardRec->getHazardType(SU, -Stalls); 685 686 if (HT == ScheduleHazardRecognizer::NoHazard) 687 break; 688 689 ++Stalls; 690 } 691 AdvanceToCycle(CurCycle + Stalls); 692 } 693 694 /// Record this SUnit in the HazardRecognizer. 695 /// Does not update CurCycle. 696 void ScheduleDAGRRList::EmitNode(SUnit *SU) { 697 if (!HazardRec->isEnabled()) 698 return; 699 700 // Check for phys reg copy. 701 if (!SU->getNode()) 702 return; 703 704 switch (SU->getNode()->getOpcode()) { 705 default: 706 assert(SU->getNode()->isMachineOpcode() && 707 "This target-independent node should not be scheduled."); 708 break; 709 case ISD::MERGE_VALUES: 710 case ISD::TokenFactor: 711 case ISD::LIFETIME_START: 712 case ISD::LIFETIME_END: 713 case ISD::CopyToReg: 714 case ISD::CopyFromReg: 715 case ISD::EH_LABEL: 716 // Noops don't affect the scoreboard state. Copies are likely to be 717 // removed. 718 return; 719 case ISD::INLINEASM: 720 case ISD::INLINEASM_BR: 721 // For inline asm, clear the pipeline state. 722 HazardRec->Reset(); 723 return; 724 } 725 if (SU->isCall) { 726 // Calls are scheduled with their preceding instructions. For bottom-up 727 // scheduling, clear the pipeline state before emitting. 728 HazardRec->Reset(); 729 } 730 731 HazardRec->EmitInstruction(SU); 732 } 733 734 static void resetVRegCycle(SUnit *SU); 735 736 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 737 /// count of its predecessors. If a predecessor pending count is zero, add it to 738 /// the Available queue. 739 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 740 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 741 LLVM_DEBUG(dumpNode(*SU)); 742 743 #ifndef NDEBUG 744 if (CurCycle < SU->getHeight()) 745 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight() 746 << "] pipeline stall!\n"); 747 #endif 748 749 // FIXME: Do not modify node height. It may interfere with 750 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 751 // node its ready cycle can aid heuristics, and after scheduling it can 752 // indicate the scheduled cycle. 753 SU->setHeightToAtLeast(CurCycle); 754 755 // Reserve resources for the scheduled instruction. 756 EmitNode(SU); 757 758 Sequence.push_back(SU); 759 760 AvailableQueue->scheduledNode(SU); 761 762 // If HazardRec is disabled, and each inst counts as one cycle, then 763 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 764 // PendingQueue for schedulers that implement HasReadyFilter. 765 if (!HazardRec->isEnabled() && AvgIPC < 2) 766 AdvanceToCycle(CurCycle + 1); 767 768 // Update liveness of predecessors before successors to avoid treating a 769 // two-address node as a live range def. 770 ReleasePredecessors(SU); 771 772 // Release all the implicit physical register defs that are live. 773 for (SDep &Succ : SU->Succs) { 774 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. 775 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) { 776 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 777 --NumLiveRegs; 778 LiveRegDefs[Succ.getReg()] = nullptr; 779 LiveRegGens[Succ.getReg()] = nullptr; 780 releaseInterferences(Succ.getReg()); 781 } 782 } 783 // Release the special call resource dependence, if this is the beginning 784 // of a call. 785 unsigned CallResource = TRI->getNumRegs(); 786 if (LiveRegDefs[CallResource] == SU) 787 for (const SDNode *SUNode = SU->getNode(); SUNode; 788 SUNode = SUNode->getGluedNode()) { 789 if (SUNode->isMachineOpcode() && 790 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 791 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 792 --NumLiveRegs; 793 LiveRegDefs[CallResource] = nullptr; 794 LiveRegGens[CallResource] = nullptr; 795 releaseInterferences(CallResource); 796 } 797 } 798 799 resetVRegCycle(SU); 800 801 SU->isScheduled = true; 802 803 // Conditions under which the scheduler should eagerly advance the cycle: 804 // (1) No available instructions 805 // (2) All pipelines full, so available instructions must have hazards. 806 // 807 // If HazardRec is disabled, the cycle was pre-advanced before calling 808 // ReleasePredecessors. In that case, IssueCount should remain 0. 809 // 810 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 811 if (HazardRec->isEnabled() || AvgIPC > 1) { 812 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 813 ++IssueCount; 814 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 815 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 816 AdvanceToCycle(CurCycle + 1); 817 } 818 } 819 820 /// CapturePred - This does the opposite of ReleasePred. Since SU is being 821 /// unscheduled, increase the succ left count of its predecessors. Remove 822 /// them from AvailableQueue if necessary. 823 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 824 SUnit *PredSU = PredEdge->getSUnit(); 825 if (PredSU->isAvailable) { 826 PredSU->isAvailable = false; 827 if (!PredSU->isPending) 828 AvailableQueue->remove(PredSU); 829 } 830 831 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() && 832 "NumSuccsLeft will overflow!"); 833 ++PredSU->NumSuccsLeft; 834 } 835 836 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 837 /// its predecessor states to reflect the change. 838 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 839 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 840 LLVM_DEBUG(dumpNode(*SU)); 841 842 for (SDep &Pred : SU->Preds) { 843 CapturePred(&Pred); 844 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){ 845 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 846 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && 847 "Physical register dependency violated?"); 848 --NumLiveRegs; 849 LiveRegDefs[Pred.getReg()] = nullptr; 850 LiveRegGens[Pred.getReg()] = nullptr; 851 releaseInterferences(Pred.getReg()); 852 } 853 } 854 855 // Reclaim the special call resource dependence, if this is the beginning 856 // of a call. 857 unsigned CallResource = TRI->getNumRegs(); 858 for (const SDNode *SUNode = SU->getNode(); SUNode; 859 SUNode = SUNode->getGluedNode()) { 860 if (SUNode->isMachineOpcode() && 861 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 862 SUnit *SeqEnd = CallSeqEndForStart[SU]; 863 assert(SeqEnd && "Call sequence start/end must be known"); 864 assert(!LiveRegDefs[CallResource]); 865 assert(!LiveRegGens[CallResource]); 866 ++NumLiveRegs; 867 LiveRegDefs[CallResource] = SU; 868 LiveRegGens[CallResource] = SeqEnd; 869 } 870 } 871 872 // Release the special call resource dependence, if this is the end 873 // of a call. 874 if (LiveRegGens[CallResource] == SU) 875 for (const SDNode *SUNode = SU->getNode(); SUNode; 876 SUNode = SUNode->getGluedNode()) { 877 if (SUNode->isMachineOpcode() && 878 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 879 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 880 assert(LiveRegDefs[CallResource]); 881 assert(LiveRegGens[CallResource]); 882 --NumLiveRegs; 883 LiveRegDefs[CallResource] = nullptr; 884 LiveRegGens[CallResource] = nullptr; 885 releaseInterferences(CallResource); 886 } 887 } 888 889 for (auto &Succ : SU->Succs) { 890 if (Succ.isAssignedRegDep()) { 891 auto Reg = Succ.getReg(); 892 if (!LiveRegDefs[Reg]) 893 ++NumLiveRegs; 894 // This becomes the nearest def. Note that an earlier def may still be 895 // pending if this is a two-address node. 896 LiveRegDefs[Reg] = SU; 897 898 // Update LiveRegGen only if was empty before this unscheduling. 899 // This is to avoid incorrect updating LiveRegGen set in previous run. 900 if (!LiveRegGens[Reg]) { 901 // Find the successor with the lowest height. 902 LiveRegGens[Reg] = Succ.getSUnit(); 903 for (auto &Succ2 : SU->Succs) { 904 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg && 905 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight()) 906 LiveRegGens[Reg] = Succ2.getSUnit(); 907 } 908 } 909 } 910 } 911 if (SU->getHeight() < MinAvailableCycle) 912 MinAvailableCycle = SU->getHeight(); 913 914 SU->setHeightDirty(); 915 SU->isScheduled = false; 916 SU->isAvailable = true; 917 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 918 // Don't make available until backtracking is complete. 919 SU->isPending = true; 920 PendingQueue.push_back(SU); 921 } 922 else { 923 AvailableQueue->push(SU); 924 } 925 AvailableQueue->unscheduledNode(SU); 926 } 927 928 /// After backtracking, the hazard checker needs to be restored to a state 929 /// corresponding the current cycle. 930 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 931 HazardRec->Reset(); 932 933 unsigned LookAhead = std::min((unsigned)Sequence.size(), 934 HazardRec->getMaxLookAhead()); 935 if (LookAhead == 0) 936 return; 937 938 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead); 939 unsigned HazardCycle = (*I)->getHeight(); 940 for (auto E = Sequence.end(); I != E; ++I) { 941 SUnit *SU = *I; 942 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 943 HazardRec->RecedeCycle(); 944 } 945 EmitNode(SU); 946 } 947 } 948 949 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 950 /// BTCycle in order to schedule a specific node. 951 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 952 SUnit *OldSU = Sequence.back(); 953 while (true) { 954 Sequence.pop_back(); 955 // FIXME: use ready cycle instead of height 956 CurCycle = OldSU->getHeight(); 957 UnscheduleNodeBottomUp(OldSU); 958 AvailableQueue->setCurCycle(CurCycle); 959 if (OldSU == BtSU) 960 break; 961 OldSU = Sequence.back(); 962 } 963 964 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 965 966 RestoreHazardCheckerBottomUp(); 967 968 ReleasePending(); 969 970 ++NumBacktracks; 971 } 972 973 static bool isOperandOf(const SUnit *SU, SDNode *N) { 974 for (const SDNode *SUNode = SU->getNode(); SUNode; 975 SUNode = SUNode->getGluedNode()) { 976 if (SUNode->isOperandOf(N)) 977 return true; 978 } 979 return false; 980 } 981 982 /// TryUnfold - Attempt to unfold 983 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { 984 SDNode *N = SU->getNode(); 985 // Use while over if to ease fall through. 986 SmallVector<SDNode *, 2> NewNodes; 987 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 988 return nullptr; 989 990 // unfolding an x86 DEC64m operation results in store, dec, load which 991 // can't be handled here so quit 992 if (NewNodes.size() == 3) 993 return nullptr; 994 995 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 996 997 N = NewNodes[1]; 998 SDNode *LoadNode = NewNodes[0]; 999 unsigned NumVals = N->getNumValues(); 1000 unsigned OldNumVals = SU->getNode()->getNumValues(); 1001 1002 // LoadNode may already exist. This can happen when there is another 1003 // load from the same location and producing the same type of value 1004 // but it has different alignment or volatileness. 1005 bool isNewLoad = true; 1006 SUnit *LoadSU; 1007 if (LoadNode->getNodeId() != -1) { 1008 LoadSU = &SUnits[LoadNode->getNodeId()]; 1009 // If LoadSU has already been scheduled, we should clone it but 1010 // this would negate the benefit to unfolding so just return SU. 1011 if (LoadSU->isScheduled) 1012 return SU; 1013 isNewLoad = false; 1014 } else { 1015 LoadSU = CreateNewSUnit(LoadNode); 1016 LoadNode->setNodeId(LoadSU->NodeNum); 1017 1018 InitNumRegDefsLeft(LoadSU); 1019 computeLatency(LoadSU); 1020 } 1021 1022 bool isNewN = true; 1023 SUnit *NewSU; 1024 // This can only happen when isNewLoad is false. 1025 if (N->getNodeId() != -1) { 1026 NewSU = &SUnits[N->getNodeId()]; 1027 // If NewSU has already been scheduled, we need to clone it, but this 1028 // negates the benefit to unfolding so just return SU. 1029 if (NewSU->isScheduled) { 1030 return SU; 1031 } 1032 isNewN = false; 1033 } else { 1034 NewSU = CreateNewSUnit(N); 1035 N->setNodeId(NewSU->NodeNum); 1036 1037 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1038 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 1039 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 1040 NewSU->isTwoAddress = true; 1041 break; 1042 } 1043 } 1044 if (MCID.isCommutable()) 1045 NewSU->isCommutable = true; 1046 1047 InitNumRegDefsLeft(NewSU); 1048 computeLatency(NewSU); 1049 } 1050 1051 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 1052 1053 // Now that we are committed to unfolding replace DAG Uses. 1054 for (unsigned i = 0; i != NumVals; ++i) 1055 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 1056 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), 1057 SDValue(LoadNode, 1)); 1058 1059 // Record all the edges to and from the old SU, by category. 1060 SmallVector<SDep, 4> ChainPreds; 1061 SmallVector<SDep, 4> ChainSuccs; 1062 SmallVector<SDep, 4> LoadPreds; 1063 SmallVector<SDep, 4> NodePreds; 1064 SmallVector<SDep, 4> NodeSuccs; 1065 for (SDep &Pred : SU->Preds) { 1066 if (Pred.isCtrl()) 1067 ChainPreds.push_back(Pred); 1068 else if (isOperandOf(Pred.getSUnit(), LoadNode)) 1069 LoadPreds.push_back(Pred); 1070 else 1071 NodePreds.push_back(Pred); 1072 } 1073 for (SDep &Succ : SU->Succs) { 1074 if (Succ.isCtrl()) 1075 ChainSuccs.push_back(Succ); 1076 else 1077 NodeSuccs.push_back(Succ); 1078 } 1079 1080 // Now assign edges to the newly-created nodes. 1081 for (const SDep &Pred : ChainPreds) { 1082 RemovePred(SU, Pred); 1083 if (isNewLoad) 1084 AddPredQueued(LoadSU, Pred); 1085 } 1086 for (const SDep &Pred : LoadPreds) { 1087 RemovePred(SU, Pred); 1088 if (isNewLoad) 1089 AddPredQueued(LoadSU, Pred); 1090 } 1091 for (const SDep &Pred : NodePreds) { 1092 RemovePred(SU, Pred); 1093 AddPredQueued(NewSU, Pred); 1094 } 1095 for (SDep &D : NodeSuccs) { 1096 SUnit *SuccDep = D.getSUnit(); 1097 D.setSUnit(SU); 1098 RemovePred(SuccDep, D); 1099 D.setSUnit(NewSU); 1100 AddPredQueued(SuccDep, D); 1101 // Balance register pressure. 1102 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && 1103 !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1104 --NewSU->NumRegDefsLeft; 1105 } 1106 for (SDep &D : ChainSuccs) { 1107 SUnit *SuccDep = D.getSUnit(); 1108 D.setSUnit(SU); 1109 RemovePred(SuccDep, D); 1110 if (isNewLoad) { 1111 D.setSUnit(LoadSU); 1112 AddPredQueued(SuccDep, D); 1113 } 1114 } 1115 1116 // Add a data dependency to reflect that NewSU reads the value defined 1117 // by LoadSU. 1118 SDep D(LoadSU, SDep::Data, 0); 1119 D.setLatency(LoadSU->Latency); 1120 AddPredQueued(NewSU, D); 1121 1122 if (isNewLoad) 1123 AvailableQueue->addNode(LoadSU); 1124 if (isNewN) 1125 AvailableQueue->addNode(NewSU); 1126 1127 ++NumUnfolds; 1128 1129 if (NewSU->NumSuccsLeft == 0) 1130 NewSU->isAvailable = true; 1131 1132 return NewSU; 1133 } 1134 1135 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 1136 /// successors to the newly created node. 1137 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 1138 SDNode *N = SU->getNode(); 1139 if (!N) 1140 return nullptr; 1141 1142 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); 1143 LLVM_DEBUG(dumpNode(*SU)); 1144 1145 if (N->getGluedNode() && 1146 !TII->canCopyGluedNodeDuringSchedule(N)) { 1147 LLVM_DEBUG( 1148 dbgs() 1149 << "Giving up because it has incoming glue and the target does not " 1150 "want to copy it\n"); 1151 return nullptr; 1152 } 1153 1154 SUnit *NewSU; 1155 bool TryUnfold = false; 1156 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 1157 MVT VT = N->getSimpleValueType(i); 1158 if (VT == MVT::Glue) { 1159 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); 1160 return nullptr; 1161 } else if (VT == MVT::Other) 1162 TryUnfold = true; 1163 } 1164 for (const SDValue &Op : N->op_values()) { 1165 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 1166 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { 1167 LLVM_DEBUG( 1168 dbgs() << "Giving up because it one of the operands is glue and " 1169 "the target does not want to copy it\n"); 1170 return nullptr; 1171 } 1172 } 1173 1174 // If possible unfold instruction. 1175 if (TryUnfold) { 1176 SUnit *UnfoldSU = TryUnfoldSU(SU); 1177 if (!UnfoldSU) 1178 return nullptr; 1179 SU = UnfoldSU; 1180 N = SU->getNode(); 1181 // If this can be scheduled don't bother duplicating and just return 1182 if (SU->NumSuccsLeft == 0) 1183 return SU; 1184 } 1185 1186 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1187 NewSU = CreateClone(SU); 1188 1189 // New SUnit has the exact same predecessors. 1190 for (SDep &Pred : SU->Preds) 1191 if (!Pred.isArtificial()) 1192 AddPredQueued(NewSU, Pred); 1193 1194 // Make sure the clone comes after the original. (InstrEmitter assumes 1195 // this ordering.) 1196 AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); 1197 1198 // Only copy scheduled successors. Cut them from old node's successor 1199 // list and move them over. 1200 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1201 for (SDep &Succ : SU->Succs) { 1202 if (Succ.isArtificial()) 1203 continue; 1204 SUnit *SuccSU = Succ.getSUnit(); 1205 if (SuccSU->isScheduled) { 1206 SDep D = Succ; 1207 D.setSUnit(NewSU); 1208 AddPredQueued(SuccSU, D); 1209 D.setSUnit(SU); 1210 DelDeps.emplace_back(SuccSU, D); 1211 } 1212 } 1213 for (const auto &[DelSU, DelD] : DelDeps) 1214 RemovePred(DelSU, DelD); 1215 1216 AvailableQueue->updateNode(SU); 1217 AvailableQueue->addNode(NewSU); 1218 1219 ++NumDups; 1220 return NewSU; 1221 } 1222 1223 /// InsertCopiesAndMoveSuccs - Insert register copies and move all 1224 /// scheduled successors of the given SUnit to the last copy. 1225 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1226 const TargetRegisterClass *DestRC, 1227 const TargetRegisterClass *SrcRC, 1228 SmallVectorImpl<SUnit*> &Copies) { 1229 SUnit *CopyFromSU = CreateNewSUnit(nullptr); 1230 CopyFromSU->CopySrcRC = SrcRC; 1231 CopyFromSU->CopyDstRC = DestRC; 1232 1233 SUnit *CopyToSU = CreateNewSUnit(nullptr); 1234 CopyToSU->CopySrcRC = DestRC; 1235 CopyToSU->CopyDstRC = SrcRC; 1236 1237 // Only copy scheduled successors. Cut them from old node's successor 1238 // list and move them over. 1239 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1240 for (SDep &Succ : SU->Succs) { 1241 if (Succ.isArtificial()) 1242 continue; 1243 SUnit *SuccSU = Succ.getSUnit(); 1244 if (SuccSU->isScheduled) { 1245 SDep D = Succ; 1246 D.setSUnit(CopyToSU); 1247 AddPredQueued(SuccSU, D); 1248 DelDeps.emplace_back(SuccSU, Succ); 1249 } 1250 else { 1251 // Avoid scheduling the def-side copy before other successors. Otherwise, 1252 // we could introduce another physreg interference on the copy and 1253 // continue inserting copies indefinitely. 1254 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); 1255 } 1256 } 1257 for (const auto &[DelSU, DelD] : DelDeps) 1258 RemovePred(DelSU, DelD); 1259 1260 SDep FromDep(SU, SDep::Data, Reg); 1261 FromDep.setLatency(SU->Latency); 1262 AddPredQueued(CopyFromSU, FromDep); 1263 SDep ToDep(CopyFromSU, SDep::Data, 0); 1264 ToDep.setLatency(CopyFromSU->Latency); 1265 AddPredQueued(CopyToSU, ToDep); 1266 1267 AvailableQueue->updateNode(SU); 1268 AvailableQueue->addNode(CopyFromSU); 1269 AvailableQueue->addNode(CopyToSU); 1270 Copies.push_back(CopyFromSU); 1271 Copies.push_back(CopyToSU); 1272 1273 ++NumPRCopies; 1274 } 1275 1276 /// getPhysicalRegisterVT - Returns the ValueType of the physical register 1277 /// definition of the specified node. 1278 /// FIXME: Move to SelectionDAG? 1279 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1280 const TargetInstrInfo *TII) { 1281 unsigned NumRes; 1282 if (N->getOpcode() == ISD::CopyFromReg) { 1283 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. 1284 NumRes = 1; 1285 } else { 1286 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1287 assert(!MCID.implicit_defs().empty() && 1288 "Physical reg def must be in implicit def list!"); 1289 NumRes = MCID.getNumDefs(); 1290 for (MCPhysReg ImpDef : MCID.implicit_defs()) { 1291 if (Reg == ImpDef) 1292 break; 1293 ++NumRes; 1294 } 1295 } 1296 return N->getSimpleValueType(NumRes); 1297 } 1298 1299 /// CheckForLiveRegDef - Return true and update live register vector if the 1300 /// specified register def of the specified SUnit clobbers any "live" registers. 1301 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, 1302 SmallSet<unsigned, 4> &RegAdded, 1303 SmallVectorImpl<unsigned> &LRegs, 1304 const TargetRegisterInfo *TRI, 1305 const SDNode *Node = nullptr) { 1306 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { 1307 1308 // Check if Ref is live. 1309 if (!LiveRegDefs[*AliasI]) continue; 1310 1311 // Allow multiple uses of the same def. 1312 if (LiveRegDefs[*AliasI] == SU) continue; 1313 1314 // Allow multiple uses of same def 1315 if (Node && LiveRegDefs[*AliasI]->getNode() == Node) 1316 continue; 1317 1318 // Add Reg to the set of interfering live regs. 1319 if (RegAdded.insert(*AliasI).second) { 1320 LRegs.push_back(*AliasI); 1321 } 1322 } 1323 } 1324 1325 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 1326 /// by RegMask, and add them to LRegs. 1327 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 1328 ArrayRef<SUnit*> LiveRegDefs, 1329 SmallSet<unsigned, 4> &RegAdded, 1330 SmallVectorImpl<unsigned> &LRegs) { 1331 // Look at all live registers. Skip Reg0 and the special CallResource. 1332 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 1333 if (!LiveRegDefs[i]) continue; 1334 if (LiveRegDefs[i] == SU) continue; 1335 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 1336 if (RegAdded.insert(i).second) 1337 LRegs.push_back(i); 1338 } 1339 } 1340 1341 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 1342 static const uint32_t *getNodeRegMask(const SDNode *N) { 1343 for (const SDValue &Op : N->op_values()) 1344 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode())) 1345 return RegOp->getRegMask(); 1346 return nullptr; 1347 } 1348 1349 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1350 /// scheduling of the given node to satisfy live physical register dependencies. 1351 /// If the specific node is the last one that's available to schedule, do 1352 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1353 bool ScheduleDAGRRList:: 1354 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { 1355 if (NumLiveRegs == 0) 1356 return false; 1357 1358 SmallSet<unsigned, 4> RegAdded; 1359 // If this node would clobber any "live" register, then it's not ready. 1360 // 1361 // If SU is the currently live definition of the same register that it uses, 1362 // then we are free to schedule it. 1363 for (SDep &Pred : SU->Preds) { 1364 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU) 1365 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), 1366 RegAdded, LRegs, TRI); 1367 } 1368 1369 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1370 if (Node->getOpcode() == ISD::INLINEASM || 1371 Node->getOpcode() == ISD::INLINEASM_BR) { 1372 // Inline asm can clobber physical defs. 1373 unsigned NumOps = Node->getNumOperands(); 1374 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1375 --NumOps; // Ignore the glue operand. 1376 1377 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1378 unsigned Flags = 1379 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1380 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1381 1382 ++i; // Skip the ID value. 1383 if (InlineAsm::isRegDefKind(Flags) || 1384 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1385 InlineAsm::isClobberKind(Flags)) { 1386 // Check for def of register or earlyclobber register. 1387 for (; NumVals; --NumVals, ++i) { 1388 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1389 if (Reg.isPhysical()) 1390 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1391 } 1392 } else 1393 i += NumVals; 1394 } 1395 continue; 1396 } 1397 1398 if (Node->getOpcode() == ISD::CopyToReg) { 1399 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); 1400 if (Reg.isPhysical()) { 1401 SDNode *SrcNode = Node->getOperand(2).getNode(); 1402 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI, 1403 SrcNode); 1404 } 1405 } 1406 1407 if (!Node->isMachineOpcode()) 1408 continue; 1409 // If we're in the middle of scheduling a call, don't begin scheduling 1410 // another call. Also, don't allow any physical registers to be live across 1411 // the call. 1412 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 1413 // Check the special calling-sequence resource. 1414 unsigned CallResource = TRI->getNumRegs(); 1415 if (LiveRegDefs[CallResource]) { 1416 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1417 while (SDNode *Glued = Gen->getGluedNode()) 1418 Gen = Glued; 1419 if (!IsChainDependent(Gen, Node, 0, TII) && 1420 RegAdded.insert(CallResource).second) 1421 LRegs.push_back(CallResource); 1422 } 1423 } 1424 if (const uint32_t *RegMask = getNodeRegMask(Node)) 1425 CheckForLiveRegDefMasked(SU, RegMask, 1426 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), 1427 RegAdded, LRegs); 1428 1429 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1430 if (MCID.hasOptionalDef()) { 1431 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. 1432 // This operand can be either a def of CPSR, if the S bit is set; or a use 1433 // of %noreg. When the OptionalDef is set to a valid register, we need to 1434 // handle it in the same way as an ImplicitDef. 1435 for (unsigned i = 0; i < MCID.getNumDefs(); ++i) 1436 if (MCID.operands()[i].isOptionalDef()) { 1437 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); 1438 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); 1439 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1440 } 1441 } 1442 for (MCPhysReg Reg : MCID.implicit_defs()) 1443 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1444 } 1445 1446 return !LRegs.empty(); 1447 } 1448 1449 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { 1450 // Add the nodes that aren't ready back onto the available list. 1451 for (unsigned i = Interferences.size(); i > 0; --i) { 1452 SUnit *SU = Interferences[i-1]; 1453 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); 1454 if (Reg) { 1455 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; 1456 if (!is_contained(LRegs, Reg)) 1457 continue; 1458 } 1459 SU->isPending = false; 1460 // The interfering node may no longer be available due to backtracking. 1461 // Furthermore, it may have been made available again, in which case it is 1462 // now already in the AvailableQueue. 1463 if (SU->isAvailable && !SU->NodeQueueId) { 1464 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); 1465 AvailableQueue->push(SU); 1466 } 1467 if (i < Interferences.size()) 1468 Interferences[i-1] = Interferences.back(); 1469 Interferences.pop_back(); 1470 LRegsMap.erase(LRegsPos); 1471 } 1472 } 1473 1474 /// Return a node that can be scheduled in this cycle. Requirements: 1475 /// (1) Ready: latency has been satisfied 1476 /// (2) No Hazards: resources are available 1477 /// (3) No Interferences: may unschedule to break register interferences. 1478 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1479 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); 1480 auto FindAvailableNode = [&]() { 1481 while (CurSU) { 1482 SmallVector<unsigned, 4> LRegs; 1483 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1484 break; 1485 LLVM_DEBUG(dbgs() << " Interfering reg "; 1486 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; 1487 else dbgs() << printReg(LRegs[0], TRI); 1488 dbgs() << " SU #" << CurSU->NodeNum << '\n'); 1489 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs); 1490 if (LRegsInserted) { 1491 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1492 Interferences.push_back(CurSU); 1493 } 1494 else { 1495 assert(CurSU->isPending && "Interferences are pending"); 1496 // Update the interference with current live regs. 1497 LRegsIter->second = LRegs; 1498 } 1499 CurSU = AvailableQueue->pop(); 1500 } 1501 }; 1502 FindAvailableNode(); 1503 if (CurSU) 1504 return CurSU; 1505 1506 // We query the topological order in the loop body, so make sure outstanding 1507 // updates are applied before entering it (we only enter the loop if there 1508 // are some interferences). If we make changes to the ordering, we exit 1509 // the loop. 1510 1511 // All candidates are delayed due to live physical reg dependencies. 1512 // Try backtracking, code duplication, or inserting cross class copies 1513 // to resolve it. 1514 for (SUnit *TrySU : Interferences) { 1515 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 1516 1517 // Try unscheduling up to the point where it's safe to schedule 1518 // this node. 1519 SUnit *BtSU = nullptr; 1520 unsigned LiveCycle = std::numeric_limits<unsigned>::max(); 1521 for (unsigned Reg : LRegs) { 1522 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1523 BtSU = LiveRegGens[Reg]; 1524 LiveCycle = BtSU->getHeight(); 1525 } 1526 } 1527 if (!WillCreateCycle(TrySU, BtSU)) { 1528 // BacktrackBottomUp mutates Interferences! 1529 BacktrackBottomUp(TrySU, BtSU); 1530 1531 // Force the current node to be scheduled before the node that 1532 // requires the physical reg dep. 1533 if (BtSU->isAvailable) { 1534 BtSU->isAvailable = false; 1535 if (!BtSU->isPending) 1536 AvailableQueue->remove(BtSU); 1537 } 1538 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum 1539 << ") to SU(" << TrySU->NodeNum << ")\n"); 1540 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial)); 1541 1542 // If one or more successors has been unscheduled, then the current 1543 // node is no longer available. 1544 if (!TrySU->isAvailable || !TrySU->NodeQueueId) { 1545 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); 1546 CurSU = AvailableQueue->pop(); 1547 } else { 1548 LLVM_DEBUG(dbgs() << "TrySU available\n"); 1549 // Available and in AvailableQueue 1550 AvailableQueue->remove(TrySU); 1551 CurSU = TrySU; 1552 } 1553 FindAvailableNode(); 1554 // Interferences has been mutated. We must break. 1555 break; 1556 } 1557 } 1558 1559 if (!CurSU) { 1560 // Can't backtrack. If it's too expensive to copy the value, then try 1561 // duplicate the nodes that produces these "too expensive to copy" 1562 // values to break the dependency. In case even that doesn't work, 1563 // insert cross class copies. 1564 // If it's not too expensive, i.e. cost != -1, issue copies. 1565 SUnit *TrySU = Interferences[0]; 1566 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 1567 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1568 unsigned Reg = LRegs[0]; 1569 SUnit *LRDef = LiveRegDefs[Reg]; 1570 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1571 const TargetRegisterClass *RC = 1572 TRI->getMinimalPhysRegClass(Reg, VT); 1573 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1574 1575 // If cross copy register class is the same as RC, then it must be possible 1576 // copy the value directly. Do not try duplicate the def. 1577 // If cross copy register class is not the same as RC, then it's possible to 1578 // copy the value but it require cross register class copies and it is 1579 // expensive. 1580 // If cross copy register class is null, then it's not possible to copy 1581 // the value at all. 1582 SUnit *NewDef = nullptr; 1583 if (DestRC != RC) { 1584 NewDef = CopyAndMoveSuccessors(LRDef); 1585 if (!DestRC && !NewDef) 1586 report_fatal_error("Can't handle live physical register dependency!"); 1587 } 1588 if (!NewDef) { 1589 // Issue copies, these can be expensive cross register class copies. 1590 SmallVector<SUnit*, 2> Copies; 1591 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1592 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1593 << " to SU #" << Copies.front()->NodeNum << "\n"); 1594 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial)); 1595 NewDef = Copies.back(); 1596 } 1597 1598 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1599 << " to SU #" << TrySU->NodeNum << "\n"); 1600 LiveRegDefs[Reg] = NewDef; 1601 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial)); 1602 TrySU->isAvailable = false; 1603 CurSU = NewDef; 1604 } 1605 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1606 return CurSU; 1607 } 1608 1609 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1610 /// schedulers. 1611 void ScheduleDAGRRList::ListScheduleBottomUp() { 1612 // Release any predecessors of the special Exit node. 1613 ReleasePredecessors(&ExitSU); 1614 1615 // Add root to Available queue. 1616 if (!SUnits.empty()) { 1617 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1618 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1619 RootSU->isAvailable = true; 1620 AvailableQueue->push(RootSU); 1621 } 1622 1623 // While Available queue is not empty, grab the node with the highest 1624 // priority. If it is not ready put it back. Schedule the node. 1625 Sequence.reserve(SUnits.size()); 1626 while (!AvailableQueue->empty() || !Interferences.empty()) { 1627 LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; 1628 AvailableQueue->dump(this)); 1629 1630 // Pick the best node to schedule taking all constraints into 1631 // consideration. 1632 SUnit *SU = PickNodeToScheduleBottomUp(); 1633 1634 AdvancePastStalls(SU); 1635 1636 ScheduleNodeBottomUp(SU); 1637 1638 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1639 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1640 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() && 1641 "MinAvailableCycle uninitialized"); 1642 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1643 } 1644 } 1645 1646 // Reverse the order if it is bottom up. 1647 std::reverse(Sequence.begin(), Sequence.end()); 1648 1649 #ifndef NDEBUG 1650 VerifyScheduledSequence(/*isBottomUp=*/true); 1651 #endif 1652 } 1653 1654 namespace { 1655 1656 class RegReductionPQBase; 1657 1658 struct queue_sort { 1659 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1660 }; 1661 1662 #ifndef NDEBUG 1663 template<class SF> 1664 struct reverse_sort : public queue_sort { 1665 SF &SortFunc; 1666 1667 reverse_sort(SF &sf) : SortFunc(sf) {} 1668 1669 bool operator()(SUnit* left, SUnit* right) const { 1670 // reverse left/right rather than simply !SortFunc(left, right) 1671 // to expose different paths in the comparison logic. 1672 return SortFunc(right, left); 1673 } 1674 }; 1675 #endif // NDEBUG 1676 1677 /// bu_ls_rr_sort - Priority function for bottom up register pressure 1678 // reduction scheduler. 1679 struct bu_ls_rr_sort : public queue_sort { 1680 enum { 1681 IsBottomUp = true, 1682 HasReadyFilter = false 1683 }; 1684 1685 RegReductionPQBase *SPQ; 1686 1687 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1688 1689 bool operator()(SUnit* left, SUnit* right) const; 1690 }; 1691 1692 // src_ls_rr_sort - Priority function for source order scheduler. 1693 struct src_ls_rr_sort : public queue_sort { 1694 enum { 1695 IsBottomUp = true, 1696 HasReadyFilter = false 1697 }; 1698 1699 RegReductionPQBase *SPQ; 1700 1701 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1702 1703 bool operator()(SUnit* left, SUnit* right) const; 1704 }; 1705 1706 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1707 struct hybrid_ls_rr_sort : public queue_sort { 1708 enum { 1709 IsBottomUp = true, 1710 HasReadyFilter = false 1711 }; 1712 1713 RegReductionPQBase *SPQ; 1714 1715 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1716 1717 bool isReady(SUnit *SU, unsigned CurCycle) const; 1718 1719 bool operator()(SUnit* left, SUnit* right) const; 1720 }; 1721 1722 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1723 // scheduler. 1724 struct ilp_ls_rr_sort : public queue_sort { 1725 enum { 1726 IsBottomUp = true, 1727 HasReadyFilter = false 1728 }; 1729 1730 RegReductionPQBase *SPQ; 1731 1732 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1733 1734 bool isReady(SUnit *SU, unsigned CurCycle) const; 1735 1736 bool operator()(SUnit* left, SUnit* right) const; 1737 }; 1738 1739 class RegReductionPQBase : public SchedulingPriorityQueue { 1740 protected: 1741 std::vector<SUnit *> Queue; 1742 unsigned CurQueueId = 0; 1743 bool TracksRegPressure; 1744 bool SrcOrder; 1745 1746 // SUnits - The SUnits for the current graph. 1747 std::vector<SUnit> *SUnits = nullptr; 1748 1749 MachineFunction &MF; 1750 const TargetInstrInfo *TII = nullptr; 1751 const TargetRegisterInfo *TRI = nullptr; 1752 const TargetLowering *TLI = nullptr; 1753 ScheduleDAGRRList *scheduleDAG = nullptr; 1754 1755 // SethiUllmanNumbers - The SethiUllman number for each node. 1756 std::vector<unsigned> SethiUllmanNumbers; 1757 1758 /// RegPressure - Tracking current reg pressure per register class. 1759 std::vector<unsigned> RegPressure; 1760 1761 /// RegLimit - Tracking the number of allocatable registers per register 1762 /// class. 1763 std::vector<unsigned> RegLimit; 1764 1765 public: 1766 RegReductionPQBase(MachineFunction &mf, 1767 bool hasReadyFilter, 1768 bool tracksrp, 1769 bool srcorder, 1770 const TargetInstrInfo *tii, 1771 const TargetRegisterInfo *tri, 1772 const TargetLowering *tli) 1773 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), 1774 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { 1775 if (TracksRegPressure) { 1776 unsigned NumRC = TRI->getNumRegClasses(); 1777 RegLimit.resize(NumRC); 1778 RegPressure.resize(NumRC); 1779 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1780 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1781 for (const TargetRegisterClass *RC : TRI->regclasses()) 1782 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF); 1783 } 1784 } 1785 1786 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1787 scheduleDAG = scheduleDag; 1788 } 1789 1790 ScheduleHazardRecognizer* getHazardRec() { 1791 return scheduleDAG->getHazardRec(); 1792 } 1793 1794 void initNodes(std::vector<SUnit> &sunits) override; 1795 1796 void addNode(const SUnit *SU) override; 1797 1798 void updateNode(const SUnit *SU) override; 1799 1800 void releaseState() override { 1801 SUnits = nullptr; 1802 SethiUllmanNumbers.clear(); 1803 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1804 } 1805 1806 unsigned getNodePriority(const SUnit *SU) const; 1807 1808 unsigned getNodeOrdering(const SUnit *SU) const { 1809 if (!SU->getNode()) return 0; 1810 1811 return SU->getNode()->getIROrder(); 1812 } 1813 1814 bool empty() const override { return Queue.empty(); } 1815 1816 void push(SUnit *U) override { 1817 assert(!U->NodeQueueId && "Node in the queue already"); 1818 U->NodeQueueId = ++CurQueueId; 1819 Queue.push_back(U); 1820 } 1821 1822 void remove(SUnit *SU) override { 1823 assert(!Queue.empty() && "Queue is empty!"); 1824 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1825 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU); 1826 if (I != std::prev(Queue.end())) 1827 std::swap(*I, Queue.back()); 1828 Queue.pop_back(); 1829 SU->NodeQueueId = 0; 1830 } 1831 1832 bool tracksRegPressure() const override { return TracksRegPressure; } 1833 1834 void dumpRegPressure() const; 1835 1836 bool HighRegPressure(const SUnit *SU) const; 1837 1838 bool MayReduceRegPressure(SUnit *SU) const; 1839 1840 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1841 1842 void scheduledNode(SUnit *SU) override; 1843 1844 void unscheduledNode(SUnit *SU) override; 1845 1846 protected: 1847 bool canClobber(const SUnit *SU, const SUnit *Op); 1848 void AddPseudoTwoAddrDeps(); 1849 void PrescheduleNodesWithMultipleUses(); 1850 void CalculateSethiUllmanNumbers(); 1851 }; 1852 1853 template<class SF> 1854 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { 1855 unsigned BestIdx = 0; 1856 // Only compute the cost for the first 1000 items in the queue, to avoid 1857 // excessive compile-times for very large queues. 1858 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E; 1859 I++) 1860 if (Picker(Q[BestIdx], Q[I])) 1861 BestIdx = I; 1862 SUnit *V = Q[BestIdx]; 1863 if (BestIdx + 1 != Q.size()) 1864 std::swap(Q[BestIdx], Q.back()); 1865 Q.pop_back(); 1866 return V; 1867 } 1868 1869 template<class SF> 1870 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { 1871 #ifndef NDEBUG 1872 if (DAG->StressSched) { 1873 reverse_sort<SF> RPicker(Picker); 1874 return popFromQueueImpl(Q, RPicker); 1875 } 1876 #endif 1877 (void)DAG; 1878 return popFromQueueImpl(Q, Picker); 1879 } 1880 1881 //===----------------------------------------------------------------------===// 1882 // RegReductionPriorityQueue Definition 1883 //===----------------------------------------------------------------------===// 1884 // 1885 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1886 // to reduce register pressure. 1887 // 1888 template<class SF> 1889 class RegReductionPriorityQueue : public RegReductionPQBase { 1890 SF Picker; 1891 1892 public: 1893 RegReductionPriorityQueue(MachineFunction &mf, 1894 bool tracksrp, 1895 bool srcorder, 1896 const TargetInstrInfo *tii, 1897 const TargetRegisterInfo *tri, 1898 const TargetLowering *tli) 1899 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, 1900 tii, tri, tli), 1901 Picker(this) {} 1902 1903 bool isBottomUp() const override { return SF::IsBottomUp; } 1904 1905 bool isReady(SUnit *U) const override { 1906 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1907 } 1908 1909 SUnit *pop() override { 1910 if (Queue.empty()) return nullptr; 1911 1912 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1913 V->NodeQueueId = 0; 1914 return V; 1915 } 1916 1917 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1918 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { 1919 // Emulate pop() without clobbering NodeQueueIds. 1920 std::vector<SUnit *> DumpQueue = Queue; 1921 SF DumpPicker = Picker; 1922 while (!DumpQueue.empty()) { 1923 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1924 dbgs() << "Height " << SU->getHeight() << ": "; 1925 DAG->dumpNode(*SU); 1926 } 1927 } 1928 #endif 1929 }; 1930 1931 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>; 1932 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>; 1933 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>; 1934 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>; 1935 1936 } // end anonymous namespace 1937 1938 //===----------------------------------------------------------------------===// 1939 // Static Node Priority for Register Pressure Reduction 1940 //===----------------------------------------------------------------------===// 1941 1942 // Check for special nodes that bypass scheduling heuristics. 1943 // Currently this pushes TokenFactor nodes down, but may be used for other 1944 // pseudo-ops as well. 1945 // 1946 // Return -1 to schedule right above left, 1 for left above right. 1947 // Return 0 if no bias exists. 1948 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1949 bool LSchedLow = left->isScheduleLow; 1950 bool RSchedLow = right->isScheduleLow; 1951 if (LSchedLow != RSchedLow) 1952 return LSchedLow < RSchedLow ? 1 : -1; 1953 return 0; 1954 } 1955 1956 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1957 /// Smaller number is the higher priority. 1958 static unsigned 1959 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1960 if (SUNumbers[SU->NodeNum] != 0) 1961 return SUNumbers[SU->NodeNum]; 1962 1963 // Use WorkList to avoid stack overflow on excessively large IRs. 1964 struct WorkState { 1965 WorkState(const SUnit *SU) : SU(SU) {} 1966 const SUnit *SU; 1967 unsigned PredsProcessed = 0; 1968 }; 1969 1970 SmallVector<WorkState, 16> WorkList; 1971 WorkList.push_back(SU); 1972 while (!WorkList.empty()) { 1973 auto &Temp = WorkList.back(); 1974 auto *TempSU = Temp.SU; 1975 bool AllPredsKnown = true; 1976 // Try to find a non-evaluated pred and push it into the processing stack. 1977 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { 1978 auto &Pred = TempSU->Preds[P]; 1979 if (Pred.isCtrl()) continue; // ignore chain preds 1980 SUnit *PredSU = Pred.getSUnit(); 1981 if (SUNumbers[PredSU->NodeNum] == 0) { 1982 #ifndef NDEBUG 1983 // In debug mode, check that we don't have such element in the stack. 1984 for (auto It : WorkList) 1985 assert(It.SU != PredSU && "Trying to push an element twice?"); 1986 #endif 1987 // Next time start processing this one starting from the next pred. 1988 Temp.PredsProcessed = P + 1; 1989 WorkList.push_back(PredSU); 1990 AllPredsKnown = false; 1991 break; 1992 } 1993 } 1994 1995 if (!AllPredsKnown) 1996 continue; 1997 1998 // Once all preds are known, we can calculate the answer for this one. 1999 unsigned SethiUllmanNumber = 0; 2000 unsigned Extra = 0; 2001 for (const SDep &Pred : TempSU->Preds) { 2002 if (Pred.isCtrl()) continue; // ignore chain preds 2003 SUnit *PredSU = Pred.getSUnit(); 2004 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; 2005 assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); 2006 if (PredSethiUllman > SethiUllmanNumber) { 2007 SethiUllmanNumber = PredSethiUllman; 2008 Extra = 0; 2009 } else if (PredSethiUllman == SethiUllmanNumber) 2010 ++Extra; 2011 } 2012 2013 SethiUllmanNumber += Extra; 2014 if (SethiUllmanNumber == 0) 2015 SethiUllmanNumber = 1; 2016 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; 2017 WorkList.pop_back(); 2018 } 2019 2020 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); 2021 return SUNumbers[SU->NodeNum]; 2022 } 2023 2024 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 2025 /// scheduling units. 2026 void RegReductionPQBase::CalculateSethiUllmanNumbers() { 2027 SethiUllmanNumbers.assign(SUnits->size(), 0); 2028 2029 for (const SUnit &SU : *SUnits) 2030 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); 2031 } 2032 2033 void RegReductionPQBase::addNode(const SUnit *SU) { 2034 unsigned SUSize = SethiUllmanNumbers.size(); 2035 if (SUnits->size() > SUSize) 2036 SethiUllmanNumbers.resize(SUSize*2, 0); 2037 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 2038 } 2039 2040 void RegReductionPQBase::updateNode(const SUnit *SU) { 2041 SethiUllmanNumbers[SU->NodeNum] = 0; 2042 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 2043 } 2044 2045 // Lower priority means schedule further down. For bottom-up scheduling, lower 2046 // priority SUs are scheduled before higher priority SUs. 2047 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 2048 assert(SU->NodeNum < SethiUllmanNumbers.size()); 2049 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2050 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2051 // CopyToReg should be close to its uses to facilitate coalescing and 2052 // avoid spilling. 2053 return 0; 2054 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2055 Opc == TargetOpcode::SUBREG_TO_REG || 2056 Opc == TargetOpcode::INSERT_SUBREG) 2057 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2058 // close to their uses to facilitate coalescing. 2059 return 0; 2060 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 2061 // If SU does not have a register use, i.e. it doesn't produce a value 2062 // that would be consumed (e.g. store), then it terminates a chain of 2063 // computation. Give it a large SethiUllman number so it will be 2064 // scheduled right before its predecessors that it doesn't lengthen 2065 // their live ranges. 2066 return 0xffff; 2067 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2068 // If SU does not have a register def, schedule it close to its uses 2069 // because it does not lengthen any live ranges. 2070 return 0; 2071 #if 1 2072 return SethiUllmanNumbers[SU->NodeNum]; 2073 #else 2074 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 2075 if (SU->isCallOp) { 2076 // FIXME: This assumes all of the defs are used as call operands. 2077 int NP = (int)Priority - SU->getNode()->getNumValues(); 2078 return (NP > 0) ? NP : 0; 2079 } 2080 return Priority; 2081 #endif 2082 } 2083 2084 //===----------------------------------------------------------------------===// 2085 // Register Pressure Tracking 2086 //===----------------------------------------------------------------------===// 2087 2088 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2089 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { 2090 for (const TargetRegisterClass *RC : TRI->regclasses()) { 2091 unsigned Id = RC->getID(); 2092 unsigned RP = RegPressure[Id]; 2093 if (!RP) continue; 2094 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " 2095 << RegLimit[Id] << '\n'); 2096 } 2097 } 2098 #endif 2099 2100 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 2101 if (!TLI) 2102 return false; 2103 2104 for (const SDep &Pred : SU->Preds) { 2105 if (Pred.isCtrl()) 2106 continue; 2107 SUnit *PredSU = Pred.getSUnit(); 2108 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2109 // to cover the number of registers defined (they are all live). 2110 if (PredSU->NumRegDefsLeft == 0) { 2111 continue; 2112 } 2113 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2114 RegDefPos.IsValid(); RegDefPos.Advance()) { 2115 unsigned RCId, Cost; 2116 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2117 2118 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 2119 return true; 2120 } 2121 } 2122 return false; 2123 } 2124 2125 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 2126 const SDNode *N = SU->getNode(); 2127 2128 if (!N->isMachineOpcode() || !SU->NumSuccs) 2129 return false; 2130 2131 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2132 for (unsigned i = 0; i != NumDefs; ++i) { 2133 MVT VT = N->getSimpleValueType(i); 2134 if (!N->hasAnyUseOfValue(i)) 2135 continue; 2136 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2137 if (RegPressure[RCId] >= RegLimit[RCId]) 2138 return true; 2139 } 2140 return false; 2141 } 2142 2143 // Compute the register pressure contribution by this instruction by count up 2144 // for uses that are not live and down for defs. Only count register classes 2145 // that are already under high pressure. As a side effect, compute the number of 2146 // uses of registers that are already live. 2147 // 2148 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 2149 // so could probably be factored. 2150 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 2151 LiveUses = 0; 2152 int PDiff = 0; 2153 for (const SDep &Pred : SU->Preds) { 2154 if (Pred.isCtrl()) 2155 continue; 2156 SUnit *PredSU = Pred.getSUnit(); 2157 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2158 // to cover the number of registers defined (they are all live). 2159 if (PredSU->NumRegDefsLeft == 0) { 2160 if (PredSU->getNode()->isMachineOpcode()) 2161 ++LiveUses; 2162 continue; 2163 } 2164 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2165 RegDefPos.IsValid(); RegDefPos.Advance()) { 2166 MVT VT = RegDefPos.GetValue(); 2167 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2168 if (RegPressure[RCId] >= RegLimit[RCId]) 2169 ++PDiff; 2170 } 2171 } 2172 const SDNode *N = SU->getNode(); 2173 2174 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 2175 return PDiff; 2176 2177 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2178 for (unsigned i = 0; i != NumDefs; ++i) { 2179 MVT VT = N->getSimpleValueType(i); 2180 if (!N->hasAnyUseOfValue(i)) 2181 continue; 2182 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2183 if (RegPressure[RCId] >= RegLimit[RCId]) 2184 --PDiff; 2185 } 2186 return PDiff; 2187 } 2188 2189 void RegReductionPQBase::scheduledNode(SUnit *SU) { 2190 if (!TracksRegPressure) 2191 return; 2192 2193 if (!SU->getNode()) 2194 return; 2195 2196 for (const SDep &Pred : SU->Preds) { 2197 if (Pred.isCtrl()) 2198 continue; 2199 SUnit *PredSU = Pred.getSUnit(); 2200 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2201 // to cover the number of registers defined (they are all live). 2202 if (PredSU->NumRegDefsLeft == 0) { 2203 continue; 2204 } 2205 // FIXME: The ScheduleDAG currently loses information about which of a 2206 // node's values is consumed by each dependence. Consequently, if the node 2207 // defines multiple register classes, we don't know which to pressurize 2208 // here. Instead the following loop consumes the register defs in an 2209 // arbitrary order. At least it handles the common case of clustered loads 2210 // to the same class. For precise liveness, each SDep needs to indicate the 2211 // result number. But that tightly couples the ScheduleDAG with the 2212 // SelectionDAG making updates tricky. A simpler hack would be to attach a 2213 // value type or register class to SDep. 2214 // 2215 // The most important aspect of register tracking is balancing the increase 2216 // here with the reduction further below. Note that this SU may use multiple 2217 // defs in PredSU. The can't be determined here, but we've already 2218 // compensated by reducing NumRegDefsLeft in PredSU during 2219 // ScheduleDAGSDNodes::AddSchedEdges. 2220 --PredSU->NumRegDefsLeft; 2221 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 2222 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2223 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2224 if (SkipRegDefs) 2225 continue; 2226 2227 unsigned RCId, Cost; 2228 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2229 RegPressure[RCId] += Cost; 2230 break; 2231 } 2232 } 2233 2234 // We should have this assert, but there may be dead SDNodes that never 2235 // materialize as SUnits, so they don't appear to generate liveness. 2236 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2237 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2238 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2239 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2240 if (SkipRegDefs > 0) 2241 continue; 2242 unsigned RCId, Cost; 2243 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2244 if (RegPressure[RCId] < Cost) { 2245 // Register pressure tracking is imprecise. This can happen. But we try 2246 // hard not to let it happen because it likely results in poor scheduling. 2247 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum 2248 << ") has too many regdefs\n"); 2249 RegPressure[RCId] = 0; 2250 } 2251 else { 2252 RegPressure[RCId] -= Cost; 2253 } 2254 } 2255 LLVM_DEBUG(dumpRegPressure()); 2256 } 2257 2258 void RegReductionPQBase::unscheduledNode(SUnit *SU) { 2259 if (!TracksRegPressure) 2260 return; 2261 2262 const SDNode *N = SU->getNode(); 2263 if (!N) return; 2264 2265 if (!N->isMachineOpcode()) { 2266 if (N->getOpcode() != ISD::CopyToReg) 2267 return; 2268 } else { 2269 unsigned Opc = N->getMachineOpcode(); 2270 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2271 Opc == TargetOpcode::INSERT_SUBREG || 2272 Opc == TargetOpcode::SUBREG_TO_REG || 2273 Opc == TargetOpcode::REG_SEQUENCE || 2274 Opc == TargetOpcode::IMPLICIT_DEF) 2275 return; 2276 } 2277 2278 for (const SDep &Pred : SU->Preds) { 2279 if (Pred.isCtrl()) 2280 continue; 2281 SUnit *PredSU = Pred.getSUnit(); 2282 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2283 // counts data deps. 2284 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2285 continue; 2286 const SDNode *PN = PredSU->getNode(); 2287 if (!PN->isMachineOpcode()) { 2288 if (PN->getOpcode() == ISD::CopyFromReg) { 2289 MVT VT = PN->getSimpleValueType(0); 2290 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2291 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2292 } 2293 continue; 2294 } 2295 unsigned POpc = PN->getMachineOpcode(); 2296 if (POpc == TargetOpcode::IMPLICIT_DEF) 2297 continue; 2298 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2299 POpc == TargetOpcode::INSERT_SUBREG || 2300 POpc == TargetOpcode::SUBREG_TO_REG) { 2301 MVT VT = PN->getSimpleValueType(0); 2302 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2303 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2304 continue; 2305 } 2306 if (POpc == TargetOpcode::REG_SEQUENCE) { 2307 unsigned DstRCIdx = 2308 cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue(); 2309 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 2310 unsigned RCId = RC->getID(); 2311 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used 2312 // here. Instead use the same constant as in GetCostForDef. 2313 RegPressure[RCId] += RegSequenceCost; 2314 continue; 2315 } 2316 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2317 for (unsigned i = 0; i != NumDefs; ++i) { 2318 MVT VT = PN->getSimpleValueType(i); 2319 if (!PN->hasAnyUseOfValue(i)) 2320 continue; 2321 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2322 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2323 // Register pressure tracking is imprecise. This can happen. 2324 RegPressure[RCId] = 0; 2325 else 2326 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2327 } 2328 } 2329 2330 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2331 // may transfer data dependencies to CopyToReg. 2332 if (SU->NumSuccs && N->isMachineOpcode()) { 2333 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2334 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2335 MVT VT = N->getSimpleValueType(i); 2336 if (VT == MVT::Glue || VT == MVT::Other) 2337 continue; 2338 if (!N->hasAnyUseOfValue(i)) 2339 continue; 2340 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2341 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2342 } 2343 } 2344 2345 LLVM_DEBUG(dumpRegPressure()); 2346 } 2347 2348 //===----------------------------------------------------------------------===// 2349 // Dynamic Node Priority for Register Pressure Reduction 2350 //===----------------------------------------------------------------------===// 2351 2352 /// closestSucc - Returns the scheduled cycle of the successor which is 2353 /// closest to the current cycle. 2354 static unsigned closestSucc(const SUnit *SU) { 2355 unsigned MaxHeight = 0; 2356 for (const SDep &Succ : SU->Succs) { 2357 if (Succ.isCtrl()) continue; // ignore chain succs 2358 unsigned Height = Succ.getSUnit()->getHeight(); 2359 // If there are bunch of CopyToRegs stacked up, they should be considered 2360 // to be at the same position. 2361 if (Succ.getSUnit()->getNode() && 2362 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2363 Height = closestSucc(Succ.getSUnit())+1; 2364 if (Height > MaxHeight) 2365 MaxHeight = Height; 2366 } 2367 return MaxHeight; 2368 } 2369 2370 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 2371 /// for scratch registers, i.e. number of data dependencies. 2372 static unsigned calcMaxScratches(const SUnit *SU) { 2373 unsigned Scratches = 0; 2374 for (const SDep &Pred : SU->Preds) { 2375 if (Pred.isCtrl()) continue; // ignore chain preds 2376 Scratches++; 2377 } 2378 return Scratches; 2379 } 2380 2381 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2382 /// CopyFromReg from a virtual register. 2383 static bool hasOnlyLiveInOpers(const SUnit *SU) { 2384 bool RetVal = false; 2385 for (const SDep &Pred : SU->Preds) { 2386 if (Pred.isCtrl()) continue; 2387 const SUnit *PredSU = Pred.getSUnit(); 2388 if (PredSU->getNode() && 2389 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2390 Register Reg = 2391 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2392 if (Reg.isVirtual()) { 2393 RetVal = true; 2394 continue; 2395 } 2396 } 2397 return false; 2398 } 2399 return RetVal; 2400 } 2401 2402 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2403 /// CopyToReg to a virtual register. This SU def is probably a liveout and 2404 /// it has no other use. It should be scheduled closer to the terminator. 2405 static bool hasOnlyLiveOutUses(const SUnit *SU) { 2406 bool RetVal = false; 2407 for (const SDep &Succ : SU->Succs) { 2408 if (Succ.isCtrl()) continue; 2409 const SUnit *SuccSU = Succ.getSUnit(); 2410 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2411 Register Reg = 2412 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2413 if (Reg.isVirtual()) { 2414 RetVal = true; 2415 continue; 2416 } 2417 } 2418 return false; 2419 } 2420 return RetVal; 2421 } 2422 2423 // Set isVRegCycle for a node with only live in opers and live out uses. Also 2424 // set isVRegCycle for its CopyFromReg operands. 2425 // 2426 // This is only relevant for single-block loops, in which case the VRegCycle 2427 // node is likely an induction variable in which the operand and target virtual 2428 // registers should be coalesced (e.g. pre/post increment values). Setting the 2429 // isVRegCycle flag helps the scheduler prioritize other uses of the same 2430 // CopyFromReg so that this node becomes the virtual register "kill". This 2431 // avoids interference between the values live in and out of the block and 2432 // eliminates a copy inside the loop. 2433 static void initVRegCycle(SUnit *SU) { 2434 if (DisableSchedVRegCycle) 2435 return; 2436 2437 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2438 return; 2439 2440 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2441 2442 SU->isVRegCycle = true; 2443 2444 for (const SDep &Pred : SU->Preds) { 2445 if (Pred.isCtrl()) continue; 2446 Pred.getSUnit()->isVRegCycle = true; 2447 } 2448 } 2449 2450 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2451 // CopyFromReg operands. We should no longer penalize other uses of this VReg. 2452 static void resetVRegCycle(SUnit *SU) { 2453 if (!SU->isVRegCycle) 2454 return; 2455 2456 for (const SDep &Pred : SU->Preds) { 2457 if (Pred.isCtrl()) continue; // ignore chain preds 2458 SUnit *PredSU = Pred.getSUnit(); 2459 if (PredSU->isVRegCycle) { 2460 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2461 "VRegCycle def must be CopyFromReg"); 2462 Pred.getSUnit()->isVRegCycle = false; 2463 } 2464 } 2465 } 2466 2467 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2468 // means a node that defines the VRegCycle has not been scheduled yet. 2469 static bool hasVRegCycleUse(const SUnit *SU) { 2470 // If this SU also defines the VReg, don't hoist it as a "use". 2471 if (SU->isVRegCycle) 2472 return false; 2473 2474 for (const SDep &Pred : SU->Preds) { 2475 if (Pred.isCtrl()) continue; // ignore chain preds 2476 if (Pred.getSUnit()->isVRegCycle && 2477 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2478 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2479 return true; 2480 } 2481 } 2482 return false; 2483 } 2484 2485 // Check for either a dependence (latency) or resource (hazard) stall. 2486 // 2487 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2488 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2489 if ((int)SPQ->getCurCycle() < Height) return true; 2490 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2491 != ScheduleHazardRecognizer::NoHazard) 2492 return true; 2493 return false; 2494 } 2495 2496 // Return -1 if left has higher priority, 1 if right has higher priority. 2497 // Return 0 if latency-based priority is equivalent. 2498 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2499 RegReductionPQBase *SPQ) { 2500 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2501 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2502 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2503 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2504 int LHeight = (int)left->getHeight() + LPenalty; 2505 int RHeight = (int)right->getHeight() + RPenalty; 2506 2507 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2508 BUHasStall(left, LHeight, SPQ); 2509 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2510 BUHasStall(right, RHeight, SPQ); 2511 2512 // If scheduling one of the node will cause a pipeline stall, delay it. 2513 // If scheduling either one of the node will cause a pipeline stall, sort 2514 // them according to their height. 2515 if (LStall) { 2516 if (!RStall) 2517 return 1; 2518 if (LHeight != RHeight) 2519 return LHeight > RHeight ? 1 : -1; 2520 } else if (RStall) 2521 return -1; 2522 2523 // If either node is scheduling for latency, sort them by height/depth 2524 // and latency. 2525 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2526 right->SchedulingPref == Sched::ILP)) { 2527 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 2528 // is enabled, grouping instructions by cycle, then its height is already 2529 // covered so only its depth matters. We also reach this point if both stall 2530 // but have the same height. 2531 if (!SPQ->getHazardRec()->isEnabled()) { 2532 if (LHeight != RHeight) 2533 return LHeight > RHeight ? 1 : -1; 2534 } 2535 int LDepth = left->getDepth() - LPenalty; 2536 int RDepth = right->getDepth() - RPenalty; 2537 if (LDepth != RDepth) { 2538 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2539 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2540 << ") depth " << RDepth << "\n"); 2541 return LDepth < RDepth ? 1 : -1; 2542 } 2543 if (left->Latency != right->Latency) 2544 return left->Latency > right->Latency ? 1 : -1; 2545 } 2546 return 0; 2547 } 2548 2549 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2550 // Schedule physical register definitions close to their use. This is 2551 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2552 // long as shortening physreg live ranges is generally good, we can defer 2553 // creating a subtarget hook. 2554 if (!DisableSchedPhysRegJoin) { 2555 bool LHasPhysReg = left->hasPhysRegDefs; 2556 bool RHasPhysReg = right->hasPhysRegDefs; 2557 if (LHasPhysReg != RHasPhysReg) { 2558 #ifndef NDEBUG 2559 static const char *const PhysRegMsg[] = { " has no physreg", 2560 " defines a physreg" }; 2561 #endif 2562 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2563 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum 2564 << ") " << PhysRegMsg[RHasPhysReg] << "\n"); 2565 return LHasPhysReg < RHasPhysReg; 2566 } 2567 } 2568 2569 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2570 unsigned LPriority = SPQ->getNodePriority(left); 2571 unsigned RPriority = SPQ->getNodePriority(right); 2572 2573 // Be really careful about hoisting call operands above previous calls. 2574 // Only allows it if it would reduce register pressure. 2575 if (left->isCall && right->isCallOp) { 2576 unsigned RNumVals = right->getNode()->getNumValues(); 2577 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2578 } 2579 if (right->isCall && left->isCallOp) { 2580 unsigned LNumVals = left->getNode()->getNumValues(); 2581 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2582 } 2583 2584 if (LPriority != RPriority) 2585 return LPriority > RPriority; 2586 2587 // One or both of the nodes are calls and their sethi-ullman numbers are the 2588 // same, then keep source order. 2589 if (left->isCall || right->isCall) { 2590 unsigned LOrder = SPQ->getNodeOrdering(left); 2591 unsigned ROrder = SPQ->getNodeOrdering(right); 2592 2593 // Prefer an ordering where the lower the non-zero order number, the higher 2594 // the preference. 2595 if ((LOrder || ROrder) && LOrder != ROrder) 2596 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2597 } 2598 2599 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2600 // e.g. 2601 // t1 = op t2, c1 2602 // t3 = op t4, c2 2603 // 2604 // and the following instructions are both ready. 2605 // t2 = op c3 2606 // t4 = op c4 2607 // 2608 // Then schedule t2 = op first. 2609 // i.e. 2610 // t4 = op c4 2611 // t2 = op c3 2612 // t1 = op t2, c1 2613 // t3 = op t4, c2 2614 // 2615 // This creates more short live intervals. 2616 unsigned LDist = closestSucc(left); 2617 unsigned RDist = closestSucc(right); 2618 if (LDist != RDist) 2619 return LDist < RDist; 2620 2621 // How many registers becomes live when the node is scheduled. 2622 unsigned LScratch = calcMaxScratches(left); 2623 unsigned RScratch = calcMaxScratches(right); 2624 if (LScratch != RScratch) 2625 return LScratch > RScratch; 2626 2627 // Comparing latency against a call makes little sense unless the node 2628 // is register pressure-neutral. 2629 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2630 return (left->NodeQueueId > right->NodeQueueId); 2631 2632 // Do not compare latencies when one or both of the nodes are calls. 2633 if (!DisableSchedCycles && 2634 !(left->isCall || right->isCall)) { 2635 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2636 if (result != 0) 2637 return result > 0; 2638 } 2639 else { 2640 if (left->getHeight() != right->getHeight()) 2641 return left->getHeight() > right->getHeight(); 2642 2643 if (left->getDepth() != right->getDepth()) 2644 return left->getDepth() < right->getDepth(); 2645 } 2646 2647 assert(left->NodeQueueId && right->NodeQueueId && 2648 "NodeQueueId cannot be zero"); 2649 return (left->NodeQueueId > right->NodeQueueId); 2650 } 2651 2652 // Bottom up 2653 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2654 if (int res = checkSpecialNodes(left, right)) 2655 return res > 0; 2656 2657 return BURRSort(left, right, SPQ); 2658 } 2659 2660 // Source order, otherwise bottom up. 2661 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2662 if (int res = checkSpecialNodes(left, right)) 2663 return res > 0; 2664 2665 unsigned LOrder = SPQ->getNodeOrdering(left); 2666 unsigned ROrder = SPQ->getNodeOrdering(right); 2667 2668 // Prefer an ordering where the lower the non-zero order number, the higher 2669 // the preference. 2670 if ((LOrder || ROrder) && LOrder != ROrder) 2671 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2672 2673 return BURRSort(left, right, SPQ); 2674 } 2675 2676 // If the time between now and when the instruction will be ready can cover 2677 // the spill code, then avoid adding it to the ready queue. This gives long 2678 // stalls highest priority and allows hoisting across calls. It should also 2679 // speed up processing the available queue. 2680 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2681 static const unsigned ReadyDelay = 3; 2682 2683 if (SPQ->MayReduceRegPressure(SU)) return true; 2684 2685 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2686 2687 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2688 != ScheduleHazardRecognizer::NoHazard) 2689 return false; 2690 2691 return true; 2692 } 2693 2694 // Return true if right should be scheduled with higher priority than left. 2695 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2696 if (int res = checkSpecialNodes(left, right)) 2697 return res > 0; 2698 2699 if (left->isCall || right->isCall) 2700 // No way to compute latency of calls. 2701 return BURRSort(left, right, SPQ); 2702 2703 bool LHigh = SPQ->HighRegPressure(left); 2704 bool RHigh = SPQ->HighRegPressure(right); 2705 // Avoid causing spills. If register pressure is high, schedule for 2706 // register pressure reduction. 2707 if (LHigh && !RHigh) { 2708 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2709 << right->NodeNum << ")\n"); 2710 return true; 2711 } 2712 else if (!LHigh && RHigh) { 2713 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2714 << left->NodeNum << ")\n"); 2715 return false; 2716 } 2717 if (!LHigh && !RHigh) { 2718 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2719 if (result != 0) 2720 return result > 0; 2721 } 2722 return BURRSort(left, right, SPQ); 2723 } 2724 2725 // Schedule as many instructions in each cycle as possible. So don't make an 2726 // instruction available unless it is ready in the current cycle. 2727 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2728 if (SU->getHeight() > CurCycle) return false; 2729 2730 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2731 != ScheduleHazardRecognizer::NoHazard) 2732 return false; 2733 2734 return true; 2735 } 2736 2737 static bool canEnableCoalescing(SUnit *SU) { 2738 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2739 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2740 // CopyToReg should be close to its uses to facilitate coalescing and 2741 // avoid spilling. 2742 return true; 2743 2744 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2745 Opc == TargetOpcode::SUBREG_TO_REG || 2746 Opc == TargetOpcode::INSERT_SUBREG) 2747 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2748 // close to their uses to facilitate coalescing. 2749 return true; 2750 2751 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2752 // If SU does not have a register def, schedule it close to its uses 2753 // because it does not lengthen any live ranges. 2754 return true; 2755 2756 return false; 2757 } 2758 2759 // list-ilp is currently an experimental scheduler that allows various 2760 // heuristics to be enabled prior to the normal register reduction logic. 2761 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2762 if (int res = checkSpecialNodes(left, right)) 2763 return res > 0; 2764 2765 if (left->isCall || right->isCall) 2766 // No way to compute latency of calls. 2767 return BURRSort(left, right, SPQ); 2768 2769 unsigned LLiveUses = 0, RLiveUses = 0; 2770 int LPDiff = 0, RPDiff = 0; 2771 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2772 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2773 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2774 } 2775 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2776 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum 2777 << "): " << LPDiff << " != SU(" << right->NodeNum 2778 << "): " << RPDiff << "\n"); 2779 return LPDiff > RPDiff; 2780 } 2781 2782 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2783 bool LReduce = canEnableCoalescing(left); 2784 bool RReduce = canEnableCoalescing(right); 2785 if (LReduce && !RReduce) return false; 2786 if (RReduce && !LReduce) return true; 2787 } 2788 2789 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2790 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2791 << " != SU(" << right->NodeNum << "): " << RLiveUses 2792 << "\n"); 2793 return LLiveUses < RLiveUses; 2794 } 2795 2796 if (!DisableSchedStalls) { 2797 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2798 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2799 if (LStall != RStall) 2800 return left->getHeight() > right->getHeight(); 2801 } 2802 2803 if (!DisableSchedCriticalPath) { 2804 int spread = (int)left->getDepth() - (int)right->getDepth(); 2805 if (std::abs(spread) > MaxReorderWindow) { 2806 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2807 << left->getDepth() << " != SU(" << right->NodeNum 2808 << "): " << right->getDepth() << "\n"); 2809 return left->getDepth() < right->getDepth(); 2810 } 2811 } 2812 2813 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2814 int spread = (int)left->getHeight() - (int)right->getHeight(); 2815 if (std::abs(spread) > MaxReorderWindow) 2816 return left->getHeight() > right->getHeight(); 2817 } 2818 2819 return BURRSort(left, right, SPQ); 2820 } 2821 2822 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2823 SUnits = &sunits; 2824 // Add pseudo dependency edges for two-address nodes. 2825 if (!Disable2AddrHack) 2826 AddPseudoTwoAddrDeps(); 2827 // Reroute edges to nodes with multiple uses. 2828 if (!TracksRegPressure && !SrcOrder) 2829 PrescheduleNodesWithMultipleUses(); 2830 // Calculate node priorities. 2831 CalculateSethiUllmanNumbers(); 2832 2833 // For single block loops, mark nodes that look like canonical IV increments. 2834 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) 2835 for (SUnit &SU : sunits) 2836 initVRegCycle(&SU); 2837 } 2838 2839 //===----------------------------------------------------------------------===// 2840 // Preschedule for Register Pressure 2841 //===----------------------------------------------------------------------===// 2842 2843 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2844 if (SU->isTwoAddress) { 2845 unsigned Opc = SU->getNode()->getMachineOpcode(); 2846 const MCInstrDesc &MCID = TII->get(Opc); 2847 unsigned NumRes = MCID.getNumDefs(); 2848 unsigned NumOps = MCID.getNumOperands() - NumRes; 2849 for (unsigned i = 0; i != NumOps; ++i) { 2850 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2851 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2852 if (DU->getNodeId() != -1 && 2853 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2854 return true; 2855 } 2856 } 2857 } 2858 return false; 2859 } 2860 2861 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2862 /// successor's explicit physregs whose definition can reach DepSU. 2863 /// i.e. DepSU should not be scheduled above SU. 2864 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2865 ScheduleDAGRRList *scheduleDAG, 2866 const TargetInstrInfo *TII, 2867 const TargetRegisterInfo *TRI) { 2868 ArrayRef<MCPhysReg> ImpDefs = 2869 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs(); 2870 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 2871 if (ImpDefs.empty() && !RegMask) 2872 return false; 2873 2874 for (const SDep &Succ : SU->Succs) { 2875 SUnit *SuccSU = Succ.getSUnit(); 2876 for (const SDep &SuccPred : SuccSU->Preds) { 2877 if (!SuccPred.isAssignedRegDep()) 2878 continue; 2879 2880 if (RegMask && 2881 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && 2882 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) 2883 return true; 2884 2885 for (MCPhysReg ImpDef : ImpDefs) { 2886 // Return true if SU clobbers this physical register use and the 2887 // definition of the register reaches from DepSU. IsReachable queries 2888 // a topological forward sort of the DAG (following the successors). 2889 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) && 2890 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) 2891 return true; 2892 } 2893 } 2894 } 2895 return false; 2896 } 2897 2898 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2899 /// physical register defs. 2900 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2901 const TargetInstrInfo *TII, 2902 const TargetRegisterInfo *TRI) { 2903 SDNode *N = SuccSU->getNode(); 2904 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2905 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs(); 2906 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs"); 2907 for (const SDNode *SUNode = SU->getNode(); SUNode; 2908 SUNode = SUNode->getGluedNode()) { 2909 if (!SUNode->isMachineOpcode()) 2910 continue; 2911 ArrayRef<MCPhysReg> SUImpDefs = 2912 TII->get(SUNode->getMachineOpcode()).implicit_defs(); 2913 const uint32_t *SURegMask = getNodeRegMask(SUNode); 2914 if (SUImpDefs.empty() && !SURegMask) 2915 continue; 2916 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2917 MVT VT = N->getSimpleValueType(i); 2918 if (VT == MVT::Glue || VT == MVT::Other) 2919 continue; 2920 if (!N->hasAnyUseOfValue(i)) 2921 continue; 2922 MCPhysReg Reg = ImpDefs[i - NumDefs]; 2923 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 2924 return true; 2925 for (MCPhysReg SUReg : SUImpDefs) { 2926 if (TRI->regsOverlap(Reg, SUReg)) 2927 return true; 2928 } 2929 } 2930 } 2931 return false; 2932 } 2933 2934 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2935 /// are not handled well by the general register pressure reduction 2936 /// heuristics. When presented with code like this: 2937 /// 2938 /// N 2939 /// / | 2940 /// / | 2941 /// U store 2942 /// | 2943 /// ... 2944 /// 2945 /// the heuristics tend to push the store up, but since the 2946 /// operand of the store has another use (U), this would increase 2947 /// the length of that other use (the U->N edge). 2948 /// 2949 /// This function transforms code like the above to route U's 2950 /// dependence through the store when possible, like this: 2951 /// 2952 /// N 2953 /// || 2954 /// || 2955 /// store 2956 /// | 2957 /// U 2958 /// | 2959 /// ... 2960 /// 2961 /// This results in the store being scheduled immediately 2962 /// after N, which shortens the U->N live range, reducing 2963 /// register pressure. 2964 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2965 // Visit all the nodes in topological order, working top-down. 2966 for (SUnit &SU : *SUnits) { 2967 // For now, only look at nodes with no data successors, such as stores. 2968 // These are especially important, due to the heuristics in 2969 // getNodePriority for nodes with no data successors. 2970 if (SU.NumSuccs != 0) 2971 continue; 2972 // For now, only look at nodes with exactly one data predecessor. 2973 if (SU.NumPreds != 1) 2974 continue; 2975 // Avoid prescheduling copies to virtual registers, which don't behave 2976 // like other nodes from the perspective of scheduling heuristics. 2977 if (SDNode *N = SU.getNode()) 2978 if (N->getOpcode() == ISD::CopyToReg && 2979 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) 2980 continue; 2981 2982 SDNode *PredFrameSetup = nullptr; 2983 for (const SDep &Pred : SU.Preds) 2984 if (Pred.isCtrl() && Pred.getSUnit()) { 2985 // Find the predecessor which is not data dependence. 2986 SDNode *PredND = Pred.getSUnit()->getNode(); 2987 2988 // If PredND is FrameSetup, we should not pre-scheduled the node, 2989 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and 2990 // ADJCALLSTACKUP may hold CallResource too long and make other 2991 // calls can't be scheduled. If there's no other available node 2992 // to schedule, the schedular will try to rename the register by 2993 // creating copy to avoid the conflict which will fail because 2994 // CallResource is not a real physical register. 2995 if (PredND && PredND->isMachineOpcode() && 2996 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) { 2997 PredFrameSetup = PredND; 2998 break; 2999 } 3000 } 3001 // Skip the node has FrameSetup parent. 3002 if (PredFrameSetup != nullptr) 3003 continue; 3004 3005 // Locate the single data predecessor. 3006 SUnit *PredSU = nullptr; 3007 for (const SDep &Pred : SU.Preds) 3008 if (!Pred.isCtrl()) { 3009 PredSU = Pred.getSUnit(); 3010 break; 3011 } 3012 assert(PredSU); 3013 3014 // Don't rewrite edges that carry physregs, because that requires additional 3015 // support infrastructure. 3016 if (PredSU->hasPhysRegDefs) 3017 continue; 3018 // Short-circuit the case where SU is PredSU's only data successor. 3019 if (PredSU->NumSuccs == 1) 3020 continue; 3021 // Avoid prescheduling to copies from virtual registers, which don't behave 3022 // like other nodes from the perspective of scheduling heuristics. 3023 if (SDNode *N = SU.getNode()) 3024 if (N->getOpcode() == ISD::CopyFromReg && 3025 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) 3026 continue; 3027 3028 // Perform checks on the successors of PredSU. 3029 for (const SDep &PredSucc : PredSU->Succs) { 3030 SUnit *PredSuccSU = PredSucc.getSUnit(); 3031 if (PredSuccSU == &SU) continue; 3032 // If PredSU has another successor with no data successors, for 3033 // now don't attempt to choose either over the other. 3034 if (PredSuccSU->NumSuccs == 0) 3035 goto outer_loop_continue; 3036 // Don't break physical register dependencies. 3037 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 3038 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) 3039 goto outer_loop_continue; 3040 // Don't introduce graph cycles. 3041 if (scheduleDAG->IsReachable(&SU, PredSuccSU)) 3042 goto outer_loop_continue; 3043 } 3044 3045 // Ok, the transformation is safe and the heuristics suggest it is 3046 // profitable. Update the graph. 3047 LLVM_DEBUG( 3048 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" 3049 << PredSU->NodeNum 3050 << " to guide scheduling in the presence of multiple uses\n"); 3051 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 3052 SDep Edge = PredSU->Succs[i]; 3053 assert(!Edge.isAssignedRegDep()); 3054 SUnit *SuccSU = Edge.getSUnit(); 3055 if (SuccSU != &SU) { 3056 Edge.setSUnit(PredSU); 3057 scheduleDAG->RemovePred(SuccSU, Edge); 3058 scheduleDAG->AddPredQueued(&SU, Edge); 3059 Edge.setSUnit(&SU); 3060 scheduleDAG->AddPredQueued(SuccSU, Edge); 3061 --i; 3062 } 3063 } 3064 outer_loop_continue:; 3065 } 3066 } 3067 3068 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 3069 /// it as a def&use operand. Add a pseudo control edge from it to the other 3070 /// node (if it won't create a cycle) so the two-address one will be scheduled 3071 /// first (lower in the schedule). If both nodes are two-address, favor the 3072 /// one that has a CopyToReg use (more likely to be a loop induction update). 3073 /// If both are two-address, but one is commutable while the other is not 3074 /// commutable, favor the one that's not commutable. 3075 void RegReductionPQBase::AddPseudoTwoAddrDeps() { 3076 for (SUnit &SU : *SUnits) { 3077 if (!SU.isTwoAddress) 3078 continue; 3079 3080 SDNode *Node = SU.getNode(); 3081 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode()) 3082 continue; 3083 3084 bool isLiveOut = hasOnlyLiveOutUses(&SU); 3085 unsigned Opc = Node->getMachineOpcode(); 3086 const MCInstrDesc &MCID = TII->get(Opc); 3087 unsigned NumRes = MCID.getNumDefs(); 3088 unsigned NumOps = MCID.getNumOperands() - NumRes; 3089 for (unsigned j = 0; j != NumOps; ++j) { 3090 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 3091 continue; 3092 SDNode *DU = SU.getNode()->getOperand(j).getNode(); 3093 if (DU->getNodeId() == -1) 3094 continue; 3095 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 3096 if (!DUSU) 3097 continue; 3098 for (const SDep &Succ : DUSU->Succs) { 3099 if (Succ.isCtrl()) 3100 continue; 3101 SUnit *SuccSU = Succ.getSUnit(); 3102 if (SuccSU == &SU) 3103 continue; 3104 // Be conservative. Ignore if nodes aren't at roughly the same 3105 // depth and height. 3106 if (SuccSU->getHeight() < SU.getHeight() && 3107 (SU.getHeight() - SuccSU->getHeight()) > 1) 3108 continue; 3109 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 3110 // constrains whatever is using the copy, instead of the copy 3111 // itself. In the case that the copy is coalesced, this 3112 // preserves the intent of the pseudo two-address heurietics. 3113 while (SuccSU->Succs.size() == 1 && 3114 SuccSU->getNode()->isMachineOpcode() && 3115 SuccSU->getNode()->getMachineOpcode() == 3116 TargetOpcode::COPY_TO_REGCLASS) 3117 SuccSU = SuccSU->Succs.front().getSUnit(); 3118 // Don't constrain non-instruction nodes. 3119 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 3120 continue; 3121 // Don't constrain nodes with physical register defs if the 3122 // predecessor can clobber them. 3123 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) { 3124 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) 3125 continue; 3126 } 3127 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 3128 // these may be coalesced away. We want them close to their uses. 3129 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 3130 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 3131 SuccOpc == TargetOpcode::INSERT_SUBREG || 3132 SuccOpc == TargetOpcode::SUBREG_TO_REG) 3133 continue; 3134 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && 3135 (!canClobber(SuccSU, DUSU) || 3136 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 3137 (!SU.isCommutable && SuccSU->isCommutable)) && 3138 !scheduleDAG->IsReachable(SuccSU, &SU)) { 3139 LLVM_DEBUG(dbgs() 3140 << " Adding a pseudo-two-addr edge from SU #" 3141 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 3142 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial)); 3143 } 3144 } 3145 } 3146 } 3147 } 3148 3149 //===----------------------------------------------------------------------===// 3150 // Public Constructor Functions 3151 //===----------------------------------------------------------------------===// 3152 3153 ScheduleDAGSDNodes * 3154 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 3155 CodeGenOpt::Level OptLevel) { 3156 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3157 const TargetInstrInfo *TII = STI.getInstrInfo(); 3158 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3159 3160 BURegReductionPriorityQueue *PQ = 3161 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); 3162 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 3163 PQ->setScheduleDAG(SD); 3164 return SD; 3165 } 3166 3167 ScheduleDAGSDNodes * 3168 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 3169 CodeGenOpt::Level OptLevel) { 3170 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3171 const TargetInstrInfo *TII = STI.getInstrInfo(); 3172 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3173 3174 SrcRegReductionPriorityQueue *PQ = 3175 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); 3176 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 3177 PQ->setScheduleDAG(SD); 3178 return SD; 3179 } 3180 3181 ScheduleDAGSDNodes * 3182 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 3183 CodeGenOpt::Level OptLevel) { 3184 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3185 const TargetInstrInfo *TII = STI.getInstrInfo(); 3186 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3187 const TargetLowering *TLI = IS->TLI; 3188 3189 HybridBURRPriorityQueue *PQ = 3190 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3191 3192 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3193 PQ->setScheduleDAG(SD); 3194 return SD; 3195 } 3196 3197 ScheduleDAGSDNodes * 3198 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 3199 CodeGenOpt::Level OptLevel) { 3200 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3201 const TargetInstrInfo *TII = STI.getInstrInfo(); 3202 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3203 const TargetLowering *TLI = IS->TLI; 3204 3205 ILPBURRPriorityQueue *PQ = 3206 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3207 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3208 PQ->setScheduleDAG(SD); 3209 return SD; 3210 } 3211