1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the RAGreedy function pass for register allocation in 10 // optimized builds. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RegAllocGreedy.h" 15 #include "AllocationOrder.h" 16 #include "InterferenceCache.h" 17 #include "RegAllocBase.h" 18 #include "SplitKit.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/BitVector.h" 21 #include "llvm/ADT/IndexedMap.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 27 #include "llvm/CodeGen/CalcSpillWeights.h" 28 #include "llvm/CodeGen/EdgeBundles.h" 29 #include "llvm/CodeGen/LiveDebugVariables.h" 30 #include "llvm/CodeGen/LiveInterval.h" 31 #include "llvm/CodeGen/LiveIntervalUnion.h" 32 #include "llvm/CodeGen/LiveIntervals.h" 33 #include "llvm/CodeGen/LiveRangeEdit.h" 34 #include "llvm/CodeGen/LiveRegMatrix.h" 35 #include "llvm/CodeGen/LiveStacks.h" 36 #include "llvm/CodeGen/MachineBasicBlock.h" 37 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 38 #include "llvm/CodeGen/MachineDominators.h" 39 #include "llvm/CodeGen/MachineFrameInfo.h" 40 #include "llvm/CodeGen/MachineFunction.h" 41 #include "llvm/CodeGen/MachineFunctionPass.h" 42 #include "llvm/CodeGen/MachineInstr.h" 43 #include "llvm/CodeGen/MachineLoopInfo.h" 44 #include "llvm/CodeGen/MachineOperand.h" 45 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 46 #include "llvm/CodeGen/MachinePassManager.h" 47 #include "llvm/CodeGen/MachineRegisterInfo.h" 48 #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" 49 #include "llvm/CodeGen/RegAllocGreedyPass.h" 50 #include "llvm/CodeGen/RegAllocPriorityAdvisor.h" 51 #include "llvm/CodeGen/RegAllocRegistry.h" 52 #include "llvm/CodeGen/RegisterClassInfo.h" 53 #include "llvm/CodeGen/SlotIndexes.h" 54 #include "llvm/CodeGen/SpillPlacement.h" 55 #include "llvm/CodeGen/Spiller.h" 56 #include "llvm/CodeGen/TargetInstrInfo.h" 57 #include "llvm/CodeGen/TargetRegisterInfo.h" 58 #include "llvm/CodeGen/TargetSubtargetInfo.h" 59 #include "llvm/CodeGen/VirtRegMap.h" 60 #include "llvm/IR/Analysis.h" 61 #include "llvm/IR/DebugInfoMetadata.h" 62 #include "llvm/IR/Function.h" 63 #include "llvm/IR/LLVMContext.h" 64 #include "llvm/Pass.h" 65 #include "llvm/Support/BlockFrequency.h" 66 #include "llvm/Support/BranchProbability.h" 67 #include "llvm/Support/CommandLine.h" 68 #include "llvm/Support/Debug.h" 69 #include "llvm/Support/MathExtras.h" 70 #include "llvm/Support/Timer.h" 71 #include "llvm/Support/raw_ostream.h" 72 #include <algorithm> 73 #include <cassert> 74 #include <cstdint> 75 #include <utility> 76 77 using namespace llvm; 78 79 #define DEBUG_TYPE "regalloc" 80 81 STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 82 STATISTIC(NumLocalSplits, "Number of split local live ranges"); 83 STATISTIC(NumEvicted, "Number of interferences evicted"); 84 85 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( 86 "split-spill-mode", cl::Hidden, 87 cl::desc("Spill mode for splitting live ranges"), 88 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 89 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 90 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), 91 cl::init(SplitEditor::SM_Speed)); 92 93 static cl::opt<unsigned> 94 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 95 cl::desc("Last chance recoloring max depth"), 96 cl::init(5)); 97 98 static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 99 "lcr-max-interf", cl::Hidden, 100 cl::desc("Last chance recoloring maximum number of considered" 101 " interference at a time"), 102 cl::init(8)); 103 104 static cl::opt<bool> ExhaustiveSearch( 105 "exhaustive-register-search", cl::NotHidden, 106 cl::desc("Exhaustive Search for registers bypassing the depth " 107 "and interference cutoffs of last chance recoloring"), 108 cl::Hidden); 109 110 // FIXME: Find a good default for this flag and remove the flag. 111 static cl::opt<unsigned> 112 CSRFirstTimeCost("regalloc-csr-first-time-cost", 113 cl::desc("Cost for first time use of callee-saved register."), 114 cl::init(0), cl::Hidden); 115 116 static cl::opt<unsigned long> GrowRegionComplexityBudget( 117 "grow-region-complexity-budget", 118 cl::desc("growRegion() does not scale with the number of BB edges, so " 119 "limit its budget and bail out once we reach the limit."), 120 cl::init(10000), cl::Hidden); 121 122 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness( 123 "greedy-regclass-priority-trumps-globalness", 124 cl::desc("Change the greedy register allocator's live range priority " 125 "calculation to make the AllocationPriority of the register class " 126 "more important then whether the range is global"), 127 cl::Hidden); 128 129 static cl::opt<bool> GreedyReverseLocalAssignment( 130 "greedy-reverse-local-assignment", 131 cl::desc("Reverse allocation order of local live ranges, such that " 132 "shorter local live ranges will tend to be allocated first"), 133 cl::Hidden); 134 135 static cl::opt<unsigned> SplitThresholdForRegWithHint( 136 "split-threshold-for-reg-with-hint", 137 cl::desc("The threshold for splitting a virtual register with a hint, in " 138 "percentage"), 139 cl::init(75), cl::Hidden); 140 141 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 142 createGreedyRegisterAllocator); 143 144 namespace { 145 class RAGreedyLegacy : public MachineFunctionPass { 146 RegAllocFilterFunc F; 147 148 public: 149 RAGreedyLegacy(const RegAllocFilterFunc F = nullptr); 150 151 static char ID; 152 /// Return the pass name. 153 StringRef getPassName() const override { return "Greedy Register Allocator"; } 154 155 /// RAGreedy analysis usage. 156 void getAnalysisUsage(AnalysisUsage &AU) const override; 157 /// Perform register allocation. 158 bool runOnMachineFunction(MachineFunction &mf) override; 159 160 MachineFunctionProperties getRequiredProperties() const override { 161 return MachineFunctionProperties().setNoPHIs(); 162 } 163 164 MachineFunctionProperties getClearedProperties() const override { 165 return MachineFunctionProperties().setIsSSA(); 166 } 167 }; 168 169 } // end anonymous namespace 170 171 RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F) 172 : MachineFunctionPass(ID), F(std::move(F)) { 173 initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry()); 174 } 175 176 struct RAGreedy::RequiredAnalyses { 177 VirtRegMap *VRM = nullptr; 178 LiveIntervals *LIS = nullptr; 179 LiveRegMatrix *LRM = nullptr; 180 SlotIndexes *Indexes = nullptr; 181 MachineBlockFrequencyInfo *MBFI = nullptr; 182 MachineDominatorTree *DomTree = nullptr; 183 MachineLoopInfo *Loops = nullptr; 184 MachineOptimizationRemarkEmitter *ORE = nullptr; 185 EdgeBundles *Bundles = nullptr; 186 SpillPlacement *SpillPlacer = nullptr; 187 LiveDebugVariables *DebugVars = nullptr; 188 189 // Used by InlineSpiller 190 LiveStacks *LSS; 191 // Proxies for eviction and priority advisors 192 RegAllocEvictionAdvisorProvider *EvictProvider; 193 RegAllocPriorityAdvisorProvider *PriorityProvider; 194 195 RequiredAnalyses() = delete; 196 RequiredAnalyses(Pass &P); 197 RequiredAnalyses(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); 198 }; 199 200 RAGreedy::RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F) 201 : RegAllocBase(F) { 202 VRM = Analyses.VRM; 203 LIS = Analyses.LIS; 204 Matrix = Analyses.LRM; 205 Indexes = Analyses.Indexes; 206 MBFI = Analyses.MBFI; 207 DomTree = Analyses.DomTree; 208 Loops = Analyses.Loops; 209 ORE = Analyses.ORE; 210 Bundles = Analyses.Bundles; 211 SpillPlacer = Analyses.SpillPlacer; 212 DebugVars = Analyses.DebugVars; 213 LSS = Analyses.LSS; 214 EvictProvider = Analyses.EvictProvider; 215 PriorityProvider = Analyses.PriorityProvider; 216 } 217 218 void RAGreedyPass::printPipeline( 219 raw_ostream &OS, 220 function_ref<StringRef(StringRef)> MapClassName2PassName) const { 221 StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName; 222 OS << "greedy<" << FilterName << '>'; 223 } 224 225 RAGreedy::RequiredAnalyses::RequiredAnalyses( 226 MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { 227 LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF); 228 LRM = &MFAM.getResult<LiveRegMatrixAnalysis>(MF); 229 LSS = &MFAM.getResult<LiveStacksAnalysis>(MF); 230 Indexes = &MFAM.getResult<SlotIndexesAnalysis>(MF); 231 MBFI = &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF); 232 DomTree = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF); 233 ORE = &MFAM.getResult<MachineOptimizationRemarkEmitterAnalysis>(MF); 234 Loops = &MFAM.getResult<MachineLoopAnalysis>(MF); 235 Bundles = &MFAM.getResult<EdgeBundlesAnalysis>(MF); 236 SpillPlacer = &MFAM.getResult<SpillPlacementAnalysis>(MF); 237 DebugVars = &MFAM.getResult<LiveDebugVariablesAnalysis>(MF); 238 EvictProvider = MFAM.getResult<RegAllocEvictionAdvisorAnalysis>(MF).Provider; 239 PriorityProvider = 240 MFAM.getResult<RegAllocPriorityAdvisorAnalysis>(MF).Provider; 241 VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF); 242 } 243 244 PreservedAnalyses RAGreedyPass::run(MachineFunction &MF, 245 MachineFunctionAnalysisManager &MFAM) { 246 MFPropsModifier _(*this, MF); 247 248 RAGreedy::RequiredAnalyses Analyses(MF, MFAM); 249 RAGreedy Impl(Analyses, Opts.Filter); 250 251 bool Changed = Impl.run(MF); 252 if (!Changed) 253 return PreservedAnalyses::all(); 254 auto PA = getMachineFunctionPassPreservedAnalyses(); 255 PA.preserveSet<CFGAnalyses>(); 256 PA.preserve<MachineBlockFrequencyAnalysis>(); 257 PA.preserve<LiveIntervalsAnalysis>(); 258 PA.preserve<SlotIndexesAnalysis>(); 259 PA.preserve<LiveDebugVariablesAnalysis>(); 260 PA.preserve<LiveStacksAnalysis>(); 261 PA.preserve<VirtRegMapAnalysis>(); 262 PA.preserve<LiveRegMatrixAnalysis>(); 263 return PA; 264 } 265 266 RAGreedy::RequiredAnalyses::RequiredAnalyses(Pass &P) { 267 VRM = &P.getAnalysis<VirtRegMapWrapperLegacy>().getVRM(); 268 LIS = &P.getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 269 LSS = &P.getAnalysis<LiveStacksWrapperLegacy>().getLS(); 270 LRM = &P.getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM(); 271 Indexes = &P.getAnalysis<SlotIndexesWrapperPass>().getSI(); 272 MBFI = &P.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); 273 DomTree = &P.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 274 ORE = &P.getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); 275 Loops = &P.getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 276 Bundles = &P.getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles(); 277 SpillPlacer = &P.getAnalysis<SpillPlacementWrapperLegacy>().getResult(); 278 DebugVars = &P.getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV(); 279 EvictProvider = 280 &P.getAnalysis<RegAllocEvictionAdvisorAnalysisLegacy>().getProvider(); 281 PriorityProvider = 282 &P.getAnalysis<RegAllocPriorityAdvisorAnalysisLegacy>().getProvider(); 283 } 284 285 bool RAGreedyLegacy::runOnMachineFunction(MachineFunction &MF) { 286 RAGreedy::RequiredAnalyses Analyses(*this); 287 RAGreedy Impl(Analyses, F); 288 return Impl.run(MF); 289 } 290 291 char RAGreedyLegacy::ID = 0; 292 char &llvm::RAGreedyLegacyID = RAGreedyLegacy::ID; 293 294 INITIALIZE_PASS_BEGIN(RAGreedyLegacy, "greedy", "Greedy Register Allocator", 295 false, false) 296 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy) 297 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) 298 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 299 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescerLegacy) 300 INITIALIZE_PASS_DEPENDENCY(MachineSchedulerLegacy) 301 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy) 302 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 303 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 304 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) 305 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) 306 INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) 307 INITIALIZE_PASS_DEPENDENCY(SpillPlacementWrapperLegacy) 308 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) 309 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysisLegacy) 310 INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysisLegacy) 311 INITIALIZE_PASS_END(RAGreedyLegacy, "greedy", "Greedy Register Allocator", 312 false, false) 313 314 #ifndef NDEBUG 315 const char *const RAGreedy::StageName[] = { 316 "RS_New", 317 "RS_Assign", 318 "RS_Split", 319 "RS_Split2", 320 "RS_Spill", 321 "RS_Done" 322 }; 323 #endif 324 325 // Hysteresis to use when comparing floats. 326 // This helps stabilize decisions based on float comparisons. 327 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 328 329 FunctionPass* llvm::createGreedyRegisterAllocator() { 330 return new RAGreedyLegacy(); 331 } 332 333 FunctionPass *llvm::createGreedyRegisterAllocator(RegAllocFilterFunc Ftor) { 334 return new RAGreedyLegacy(Ftor); 335 } 336 337 void RAGreedyLegacy::getAnalysisUsage(AnalysisUsage &AU) const { 338 AU.setPreservesCFG(); 339 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); 340 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); 341 AU.addRequired<LiveIntervalsWrapperPass>(); 342 AU.addPreserved<LiveIntervalsWrapperPass>(); 343 AU.addRequired<SlotIndexesWrapperPass>(); 344 AU.addPreserved<SlotIndexesWrapperPass>(); 345 AU.addRequired<LiveDebugVariablesWrapperLegacy>(); 346 AU.addPreserved<LiveDebugVariablesWrapperLegacy>(); 347 AU.addRequired<LiveStacksWrapperLegacy>(); 348 AU.addPreserved<LiveStacksWrapperLegacy>(); 349 AU.addRequired<MachineDominatorTreeWrapperPass>(); 350 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 351 AU.addRequired<MachineLoopInfoWrapperPass>(); 352 AU.addPreserved<MachineLoopInfoWrapperPass>(); 353 AU.addRequired<VirtRegMapWrapperLegacy>(); 354 AU.addPreserved<VirtRegMapWrapperLegacy>(); 355 AU.addRequired<LiveRegMatrixWrapperLegacy>(); 356 AU.addPreserved<LiveRegMatrixWrapperLegacy>(); 357 AU.addRequired<EdgeBundlesWrapperLegacy>(); 358 AU.addRequired<SpillPlacementWrapperLegacy>(); 359 AU.addRequired<MachineOptimizationRemarkEmitterPass>(); 360 AU.addRequired<RegAllocEvictionAdvisorAnalysisLegacy>(); 361 AU.addRequired<RegAllocPriorityAdvisorAnalysisLegacy>(); 362 MachineFunctionPass::getAnalysisUsage(AU); 363 } 364 365 //===----------------------------------------------------------------------===// 366 // LiveRangeEdit delegate methods 367 //===----------------------------------------------------------------------===// 368 369 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) { 370 LiveInterval &LI = LIS->getInterval(VirtReg); 371 if (VRM->hasPhys(VirtReg)) { 372 Matrix->unassign(LI); 373 aboutToRemoveInterval(LI); 374 return true; 375 } 376 // Unassigned virtreg is probably in the priority queue. 377 // RegAllocBase will erase it after dequeueing. 378 // Nonetheless, clear the live-range so that the debug 379 // dump will show the right state for that VirtReg. 380 LI.clear(); 381 return false; 382 } 383 384 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) { 385 if (!VRM->hasPhys(VirtReg)) 386 return; 387 388 // Register is assigned, put it back on the queue for reassignment. 389 LiveInterval &LI = LIS->getInterval(VirtReg); 390 Matrix->unassign(LI); 391 RegAllocBase::enqueue(&LI); 392 } 393 394 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { 395 ExtraInfo->LRE_DidCloneVirtReg(New, Old); 396 } 397 398 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) { 399 // Cloning a register we haven't even heard about yet? Just ignore it. 400 if (!Info.inBounds(Old)) 401 return; 402 403 // LRE may clone a virtual register because dead code elimination causes it to 404 // be split into connected components. The new components are much smaller 405 // than the original, so they should get a new chance at being assigned. 406 // same stage as the parent. 407 Info[Old].Stage = RS_Assign; 408 Info.grow(New.id()); 409 Info[New] = Info[Old]; 410 } 411 412 void RAGreedy::releaseMemory() { 413 SpillerInstance.reset(); 414 GlobalCand.clear(); 415 } 416 417 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); } 418 419 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { 420 // Prioritize live ranges by size, assigning larger ranges first. 421 // The queue holds (size, reg) pairs. 422 const Register Reg = LI->reg(); 423 assert(Reg.isVirtual() && "Can only enqueue virtual registers"); 424 425 auto Stage = ExtraInfo->getOrInitStage(Reg); 426 if (Stage == RS_New) { 427 Stage = RS_Assign; 428 ExtraInfo->setStage(Reg, Stage); 429 } 430 431 unsigned Ret = PriorityAdvisor->getPriority(*LI); 432 433 // The virtual register number is a tie breaker for same-sized ranges. 434 // Give lower vreg numbers higher priority to assign them first. 435 CurQueue.push(std::make_pair(Ret, ~Reg.id())); 436 } 437 438 unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const { 439 const unsigned Size = LI.getSize(); 440 const Register Reg = LI.reg(); 441 unsigned Prio; 442 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI); 443 444 if (Stage == RS_Split) { 445 // Unsplit ranges that couldn't be allocated immediately are deferred until 446 // everything else has been allocated. 447 Prio = Size; 448 } else { 449 // Giant live ranges fall back to the global assignment heuristic, which 450 // prevents excessive spilling in pathological cases. 451 const TargetRegisterClass &RC = *MRI->getRegClass(Reg); 452 bool ForceGlobal = RC.GlobalPriority || 453 (!ReverseLocalAssignment && 454 (Size / SlotIndex::InstrDist) > 455 (2 * RegClassInfo.getNumAllocatableRegs(&RC))); 456 unsigned GlobalBit = 0; 457 458 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() && 459 LIS->intervalIsInOneMBB(LI)) { 460 // Allocate original local ranges in linear instruction order. Since they 461 // are singly defined, this produces optimal coloring in the absence of 462 // global interference and other constraints. 463 if (!ReverseLocalAssignment) 464 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex()); 465 else { 466 // Allocating bottom up may allow many short LRGs to be assigned first 467 // to one of the cheap registers. This could be much faster for very 468 // large blocks on targets with many physical registers. 469 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex()); 470 } 471 } else { 472 // Allocate global and split ranges in long->short order. Long ranges that 473 // don't fit should be spilled (or split) ASAP so they don't create 474 // interference. Mark a bit to prioritize global above local ranges. 475 Prio = Size; 476 GlobalBit = 1; 477 } 478 479 // Priority bit layout: 480 // 31 RS_Assign priority 481 // 30 Preference priority 482 // if (RegClassPriorityTrumpsGlobalness) 483 // 29-25 AllocPriority 484 // 24 GlobalBit 485 // else 486 // 29 Global bit 487 // 28-24 AllocPriority 488 // 0-23 Size/Instr distance 489 490 // Clamp the size to fit with the priority masking scheme 491 Prio = std::min(Prio, (unsigned)maxUIntN(24)); 492 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow"); 493 494 if (RegClassPriorityTrumpsGlobalness) 495 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24; 496 else 497 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24; 498 499 // Mark a higher bit to prioritize global and local above RS_Split. 500 Prio |= (1u << 31); 501 502 // Boost ranges that have a physical register hint. 503 if (VRM->hasKnownPreference(Reg)) 504 Prio |= (1u << 30); 505 } 506 507 return Prio; 508 } 509 510 unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const { 511 // Prioritize by virtual register number, lowest first. 512 Register Reg = LI.reg(); 513 return ~Reg.virtRegIndex(); 514 } 515 516 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 517 518 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 519 if (CurQueue.empty()) 520 return nullptr; 521 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 522 CurQueue.pop(); 523 return LI; 524 } 525 526 //===----------------------------------------------------------------------===// 527 // Direct Assignment 528 //===----------------------------------------------------------------------===// 529 530 /// tryAssign - Try to assign VirtReg to an available register. 531 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg, 532 AllocationOrder &Order, 533 SmallVectorImpl<Register> &NewVRegs, 534 const SmallVirtRegSet &FixedRegisters) { 535 MCRegister PhysReg; 536 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { 537 assert(*I); 538 if (!Matrix->checkInterference(VirtReg, *I)) { 539 if (I.isHint()) 540 return *I; 541 else 542 PhysReg = *I; 543 } 544 } 545 if (!PhysReg.isValid()) 546 return PhysReg; 547 548 // PhysReg is available, but there may be a better choice. 549 550 // If we missed a simple hint, try to cheaply evict interference from the 551 // preferred register. 552 if (Register Hint = MRI->getSimpleHint(VirtReg.reg())) 553 if (Order.isHint(Hint)) { 554 MCRegister PhysHint = Hint.asMCReg(); 555 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n'); 556 557 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint, 558 FixedRegisters)) { 559 evictInterference(VirtReg, PhysHint, NewVRegs); 560 return PhysHint; 561 } 562 563 // We can also split the virtual register in cold blocks. 564 if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order)) 565 return MCRegister(); 566 567 // Record the missed hint, we may be able to recover 568 // at the end if the surrounding allocation changed. 569 SetOfBrokenHints.insert(&VirtReg); 570 } 571 572 // Try to evict interference from a cheaper alternative. 573 uint8_t Cost = RegCosts[PhysReg.id()]; 574 575 // Most registers have 0 additional cost. 576 if (!Cost) 577 return PhysReg; 578 579 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " 580 << (unsigned)Cost << '\n'); 581 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); 582 return CheapReg ? CheapReg : PhysReg; 583 } 584 585 //===----------------------------------------------------------------------===// 586 // Interference eviction 587 //===----------------------------------------------------------------------===// 588 589 bool RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg, 590 MCRegister FromReg) const { 591 auto HasRegUnitInterference = [&](MCRegUnit Unit) { 592 // Instantiate a "subquery", not to be confused with the Queries array. 593 LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]); 594 return SubQ.checkInterference(); 595 }; 596 597 for (MCRegister Reg : 598 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix)) { 599 if (Reg == FromReg) 600 continue; 601 // If no units have interference, reassignment is possible. 602 if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) { 603 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 604 << printReg(FromReg, TRI) << " to " 605 << printReg(Reg, TRI) << '\n'); 606 return true; 607 } 608 } 609 return false; 610 } 611 612 /// evictInterference - Evict any interferring registers that prevent VirtReg 613 /// from being assigned to Physreg. This assumes that canEvictInterference 614 /// returned true. 615 void RAGreedy::evictInterference(const LiveInterval &VirtReg, 616 MCRegister PhysReg, 617 SmallVectorImpl<Register> &NewVRegs) { 618 // Make sure that VirtReg has a cascade number, and assign that cascade 619 // number to every evicted register. These live ranges than then only be 620 // evicted by a newer cascade, preventing infinite loops. 621 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg()); 622 623 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) 624 << " interference: Cascade " << Cascade << '\n'); 625 626 // Collect all interfering virtregs first. 627 SmallVector<const LiveInterval *, 8> Intfs; 628 for (MCRegUnit Unit : TRI->regunits(PhysReg)) { 629 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit); 630 // We usually have the interfering VRegs cached so collectInterferingVRegs() 631 // should be fast, we may need to recalculate if when different physregs 632 // overlap the same register unit so we had different SubRanges queried 633 // against it. 634 ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs(); 635 Intfs.append(IVR.begin(), IVR.end()); 636 } 637 638 // Evict them second. This will invalidate the queries. 639 for (const LiveInterval *Intf : Intfs) { 640 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 641 if (!VRM->hasPhys(Intf->reg())) 642 continue; 643 644 Matrix->unassign(*Intf); 645 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade || 646 VirtReg.isSpillable() < Intf->isSpillable()) && 647 "Cannot decrease cascade number, illegal eviction"); 648 ExtraInfo->setCascade(Intf->reg(), Cascade); 649 ++NumEvicted; 650 NewVRegs.push_back(Intf->reg()); 651 } 652 } 653 654 /// Returns true if the given \p PhysReg is a callee saved register and has not 655 /// been used for allocation yet. 656 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const { 657 MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); 658 if (!CSR) 659 return false; 660 661 return !Matrix->isPhysRegUsed(PhysReg); 662 } 663 664 std::optional<unsigned> 665 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg, 666 const AllocationOrder &Order, 667 unsigned CostPerUseLimit) const { 668 unsigned OrderLimit = Order.getOrder().size(); 669 670 if (CostPerUseLimit < uint8_t(~0u)) { 671 // Check of any registers in RC are below CostPerUseLimit. 672 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg()); 673 uint8_t MinCost = RegClassInfo.getMinCost(RC); 674 if (MinCost >= CostPerUseLimit) { 675 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " 676 << MinCost << ", no cheaper registers to be found.\n"); 677 return std::nullopt; 678 } 679 680 // It is normal for register classes to have a long tail of registers with 681 // the same cost. We don't need to look at them if they're too expensive. 682 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) { 683 OrderLimit = RegClassInfo.getLastCostChange(RC); 684 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit 685 << " regs.\n"); 686 } 687 } 688 return OrderLimit; 689 } 690 691 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit, 692 MCRegister PhysReg) const { 693 if (RegCosts[PhysReg.id()] >= CostPerUseLimit) 694 return false; 695 // The first use of a callee-saved register in a function has cost 1. 696 // Don't start using a CSR when the CostPerUseLimit is low. 697 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { 698 LLVM_DEBUG( 699 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " 700 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) 701 << '\n'); 702 return false; 703 } 704 return true; 705 } 706 707 /// tryEvict - Try to evict all interferences for a physreg. 708 /// @param VirtReg Currently unassigned virtual register. 709 /// @param Order Physregs to try. 710 /// @return Physreg to assign VirtReg, or 0. 711 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg, 712 AllocationOrder &Order, 713 SmallVectorImpl<Register> &NewVRegs, 714 uint8_t CostPerUseLimit, 715 const SmallVirtRegSet &FixedRegisters) { 716 NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, 717 TimePassesIsEnabled); 718 719 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate( 720 VirtReg, Order, CostPerUseLimit, FixedRegisters); 721 if (BestPhys.isValid()) 722 evictInterference(VirtReg, BestPhys, NewVRegs); 723 return BestPhys; 724 } 725 726 //===----------------------------------------------------------------------===// 727 // Region Splitting 728 //===----------------------------------------------------------------------===// 729 730 /// addSplitConstraints - Fill out the SplitConstraints vector based on the 731 /// interference pattern in Physreg and its aliases. Add the constraints to 732 /// SpillPlacement and return the static cost of this split in Cost, assuming 733 /// that all preferences in SplitConstraints are met. 734 /// Return false if there are no bundles with positive bias. 735 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 736 BlockFrequency &Cost) { 737 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 738 739 // Reset interference dependent info. 740 SplitConstraints.resize(UseBlocks.size()); 741 BlockFrequency StaticCost = BlockFrequency(0); 742 for (unsigned I = 0; I != UseBlocks.size(); ++I) { 743 const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; 744 SpillPlacement::BlockConstraint &BC = SplitConstraints[I]; 745 746 BC.Number = BI.MBB->getNumber(); 747 Intf.moveToBlock(BC.Number); 748 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 749 BC.Exit = (BI.LiveOut && 750 !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef()) 751 ? SpillPlacement::PrefReg 752 : SpillPlacement::DontCare; 753 BC.ChangesValue = BI.FirstDef.isValid(); 754 755 if (!Intf.hasInterference()) 756 continue; 757 758 // Number of spill code instructions to insert. 759 unsigned Ins = 0; 760 761 // Interference for the live-in value. 762 if (BI.LiveIn) { 763 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) { 764 BC.Entry = SpillPlacement::MustSpill; 765 ++Ins; 766 } else if (Intf.first() < BI.FirstInstr) { 767 BC.Entry = SpillPlacement::PrefSpill; 768 ++Ins; 769 } else if (Intf.first() < BI.LastInstr) { 770 ++Ins; 771 } 772 773 // Abort if the spill cannot be inserted at the MBB' start 774 if (((BC.Entry == SpillPlacement::MustSpill) || 775 (BC.Entry == SpillPlacement::PrefSpill)) && 776 SlotIndex::isEarlierInstr(BI.FirstInstr, 777 SA->getFirstSplitPoint(BC.Number))) 778 return false; 779 } 780 781 // Interference for the live-out value. 782 if (BI.LiveOut) { 783 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) { 784 BC.Exit = SpillPlacement::MustSpill; 785 ++Ins; 786 } else if (Intf.last() > BI.LastInstr) { 787 BC.Exit = SpillPlacement::PrefSpill; 788 ++Ins; 789 } else if (Intf.last() > BI.FirstInstr) { 790 ++Ins; 791 } 792 } 793 794 // Accumulate the total frequency of inserted spill code. 795 while (Ins--) 796 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 797 } 798 Cost = StaticCost; 799 800 // Add constraints for use-blocks. Note that these are the only constraints 801 // that may add a positive bias, it is downhill from here. 802 SpillPlacer->addConstraints(SplitConstraints); 803 return SpillPlacer->scanActiveBundles(); 804 } 805 806 /// addThroughConstraints - Add constraints and links to SpillPlacer from the 807 /// live-through blocks in Blocks. 808 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 809 ArrayRef<unsigned> Blocks) { 810 const unsigned GroupSize = 8; 811 SpillPlacement::BlockConstraint BCS[GroupSize]; 812 unsigned TBS[GroupSize]; 813 unsigned B = 0, T = 0; 814 815 for (unsigned Number : Blocks) { 816 Intf.moveToBlock(Number); 817 818 if (!Intf.hasInterference()) { 819 assert(T < GroupSize && "Array overflow"); 820 TBS[T] = Number; 821 if (++T == GroupSize) { 822 SpillPlacer->addLinks(ArrayRef(TBS, T)); 823 T = 0; 824 } 825 continue; 826 } 827 828 assert(B < GroupSize && "Array overflow"); 829 BCS[B].Number = Number; 830 831 // Abort if the spill cannot be inserted at the MBB' start 832 MachineBasicBlock *MBB = MF->getBlockNumbered(Number); 833 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr(); 834 if (FirstNonDebugInstr != MBB->end() && 835 SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr), 836 SA->getFirstSplitPoint(Number))) 837 return false; 838 // Interference for the live-in value. 839 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 840 BCS[B].Entry = SpillPlacement::MustSpill; 841 else 842 BCS[B].Entry = SpillPlacement::PrefSpill; 843 844 // Interference for the live-out value. 845 if (Intf.last() >= SA->getLastSplitPoint(Number)) 846 BCS[B].Exit = SpillPlacement::MustSpill; 847 else 848 BCS[B].Exit = SpillPlacement::PrefSpill; 849 850 if (++B == GroupSize) { 851 SpillPlacer->addConstraints(ArrayRef(BCS, B)); 852 B = 0; 853 } 854 } 855 856 SpillPlacer->addConstraints(ArrayRef(BCS, B)); 857 SpillPlacer->addLinks(ArrayRef(TBS, T)); 858 return true; 859 } 860 861 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 862 // Keep track of through blocks that have not been added to SpillPlacer. 863 BitVector Todo = SA->getThroughBlocks(); 864 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 865 unsigned AddedTo = 0; 866 #ifndef NDEBUG 867 unsigned Visited = 0; 868 #endif 869 870 unsigned long Budget = GrowRegionComplexityBudget; 871 while (true) { 872 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 873 // Find new through blocks in the periphery of PrefRegBundles. 874 for (unsigned Bundle : NewBundles) { 875 // Look at all blocks connected to Bundle in the full graph. 876 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 877 // Limit compilation time by bailing out after we use all our budget. 878 if (Blocks.size() >= Budget) 879 return false; 880 Budget -= Blocks.size(); 881 for (unsigned Block : Blocks) { 882 if (!Todo.test(Block)) 883 continue; 884 Todo.reset(Block); 885 // This is a new through block. Add it to SpillPlacer later. 886 ActiveBlocks.push_back(Block); 887 #ifndef NDEBUG 888 ++Visited; 889 #endif 890 } 891 } 892 // Any new blocks to add? 893 if (ActiveBlocks.size() == AddedTo) 894 break; 895 896 // Compute through constraints from the interference, or assume that all 897 // through blocks prefer spilling when forming compact regions. 898 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo); 899 if (Cand.PhysReg) { 900 if (!addThroughConstraints(Cand.Intf, NewBlocks)) 901 return false; 902 } else { 903 // Providing that the variable being spilled does not look like a loop 904 // induction variable, which is expensive to spill around and better 905 // pushed into a condition inside the loop if possible, provide a strong 906 // negative bias on through blocks to prevent unwanted liveness on loop 907 // backedges. 908 bool PrefSpill = true; 909 if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) { 910 // Check that the current bundle is adding a Header + start+end of 911 // loop-internal blocks. If the block is indeed a header, don't make 912 // the NewBlocks as PrefSpill to allow the variable to be live in 913 // Header<->Latch. 914 MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0])); 915 if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] && 916 all_of(NewBlocks.drop_front(), [&](unsigned Block) { 917 return L == Loops->getLoopFor(MF->getBlockNumbered(Block)); 918 })) 919 PrefSpill = false; 920 } 921 if (PrefSpill) 922 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 923 } 924 AddedTo = ActiveBlocks.size(); 925 926 // Perhaps iterating can enable more bundles? 927 SpillPlacer->iterate(); 928 } 929 LLVM_DEBUG(dbgs() << ", v=" << Visited); 930 return true; 931 } 932 933 /// calcCompactRegion - Compute the set of edge bundles that should be live 934 /// when splitting the current live range into compact regions. Compact 935 /// regions can be computed without looking at interference. They are the 936 /// regions formed by removing all the live-through blocks from the live range. 937 /// 938 /// Returns false if the current live range is already compact, or if the 939 /// compact regions would form single block regions anyway. 940 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 941 // Without any through blocks, the live range is already compact. 942 if (!SA->getNumThroughBlocks()) 943 return false; 944 945 // Compact regions don't correspond to any physreg. 946 Cand.reset(IntfCache, MCRegister::NoRegister); 947 948 LLVM_DEBUG(dbgs() << "Compact region bundles"); 949 950 // Use the spill placer to determine the live bundles. GrowRegion pretends 951 // that all the through blocks have interference when PhysReg is unset. 952 SpillPlacer->prepare(Cand.LiveBundles); 953 954 // The static split cost will be zero since Cand.Intf reports no interference. 955 BlockFrequency Cost; 956 if (!addSplitConstraints(Cand.Intf, Cost)) { 957 LLVM_DEBUG(dbgs() << ", none.\n"); 958 return false; 959 } 960 961 if (!growRegion(Cand)) { 962 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); 963 return false; 964 } 965 966 SpillPlacer->finish(); 967 968 if (!Cand.LiveBundles.any()) { 969 LLVM_DEBUG(dbgs() << ", none.\n"); 970 return false; 971 } 972 973 LLVM_DEBUG({ 974 for (int I : Cand.LiveBundles.set_bits()) 975 dbgs() << " EB#" << I; 976 dbgs() << ".\n"; 977 }); 978 return true; 979 } 980 981 /// calcSpillCost - Compute how expensive it would be to split the live range in 982 /// SA around all use blocks instead of forming bundle regions. 983 BlockFrequency RAGreedy::calcSpillCost() { 984 BlockFrequency Cost = BlockFrequency(0); 985 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 986 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { 987 unsigned Number = BI.MBB->getNumber(); 988 // We normally only need one spill instruction - a load or a store. 989 Cost += SpillPlacer->getBlockFrequency(Number); 990 991 // Unless the value is redefined in the block. 992 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 993 Cost += SpillPlacer->getBlockFrequency(Number); 994 } 995 return Cost; 996 } 997 998 /// calcGlobalSplitCost - Return the global split cost of following the split 999 /// pattern in LiveBundles. This cost should be added to the local cost of the 1000 /// interference pattern in SplitConstraints. 1001 /// 1002 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, 1003 const AllocationOrder &Order) { 1004 BlockFrequency GlobalCost = BlockFrequency(0); 1005 const BitVector &LiveBundles = Cand.LiveBundles; 1006 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1007 for (unsigned I = 0; I != UseBlocks.size(); ++I) { 1008 const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; 1009 SpillPlacement::BlockConstraint &BC = SplitConstraints[I]; 1010 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; 1011 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; 1012 unsigned Ins = 0; 1013 1014 Cand.Intf.moveToBlock(BC.Number); 1015 1016 if (BI.LiveIn) 1017 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 1018 if (BI.LiveOut) 1019 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 1020 while (Ins--) 1021 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 1022 } 1023 1024 for (unsigned Number : Cand.ActiveBlocks) { 1025 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)]; 1026 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)]; 1027 if (!RegIn && !RegOut) 1028 continue; 1029 if (RegIn && RegOut) { 1030 // We need double spill code if this block has interference. 1031 Cand.Intf.moveToBlock(Number); 1032 if (Cand.Intf.hasInterference()) { 1033 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1034 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1035 } 1036 continue; 1037 } 1038 // live-in / stack-out or stack-in live-out. 1039 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1040 } 1041 return GlobalCost; 1042 } 1043 1044 /// splitAroundRegion - Split the current live range around the regions 1045 /// determined by BundleCand and GlobalCand. 1046 /// 1047 /// Before calling this function, GlobalCand and BundleCand must be initialized 1048 /// so each bundle is assigned to a valid candidate, or NoCand for the 1049 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 1050 /// objects must be initialized for the current live range, and intervals 1051 /// created for the used candidates. 1052 /// 1053 /// @param LREdit The LiveRangeEdit object handling the current split. 1054 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value 1055 /// must appear in this list. 1056 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 1057 ArrayRef<unsigned> UsedCands) { 1058 // These are the intervals created for new global ranges. We may create more 1059 // intervals for local ranges. 1060 const unsigned NumGlobalIntvs = LREdit.size(); 1061 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs 1062 << " globals.\n"); 1063 assert(NumGlobalIntvs && "No global intervals configured"); 1064 1065 // Isolate even single instructions when dealing with a proper sub-class. 1066 // That guarantees register class inflation for the stack interval because it 1067 // is all copies. 1068 Register Reg = SA->getParent().reg(); 1069 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1070 1071 // First handle all the blocks with uses. 1072 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1073 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { 1074 unsigned Number = BI.MBB->getNumber(); 1075 unsigned IntvIn = 0, IntvOut = 0; 1076 SlotIndex IntfIn, IntfOut; 1077 if (BI.LiveIn) { 1078 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; 1079 if (CandIn != NoCand) { 1080 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1081 IntvIn = Cand.IntvIdx; 1082 Cand.Intf.moveToBlock(Number); 1083 IntfIn = Cand.Intf.first(); 1084 } 1085 } 1086 if (BI.LiveOut) { 1087 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; 1088 if (CandOut != NoCand) { 1089 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1090 IntvOut = Cand.IntvIdx; 1091 Cand.Intf.moveToBlock(Number); 1092 IntfOut = Cand.Intf.last(); 1093 } 1094 } 1095 1096 // Create separate intervals for isolated blocks with multiple uses. 1097 if (!IntvIn && !IntvOut) { 1098 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n"); 1099 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1100 SE->splitSingleBlock(BI); 1101 continue; 1102 } 1103 1104 if (IntvIn && IntvOut) 1105 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1106 else if (IntvIn) 1107 SE->splitRegInBlock(BI, IntvIn, IntfIn); 1108 else 1109 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 1110 } 1111 1112 // Handle live-through blocks. The relevant live-through blocks are stored in 1113 // the ActiveBlocks list with each candidate. We need to filter out 1114 // duplicates. 1115 BitVector Todo = SA->getThroughBlocks(); 1116 for (unsigned UsedCand : UsedCands) { 1117 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks; 1118 for (unsigned Number : Blocks) { 1119 if (!Todo.test(Number)) 1120 continue; 1121 Todo.reset(Number); 1122 1123 unsigned IntvIn = 0, IntvOut = 0; 1124 SlotIndex IntfIn, IntfOut; 1125 1126 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; 1127 if (CandIn != NoCand) { 1128 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1129 IntvIn = Cand.IntvIdx; 1130 Cand.Intf.moveToBlock(Number); 1131 IntfIn = Cand.Intf.first(); 1132 } 1133 1134 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; 1135 if (CandOut != NoCand) { 1136 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1137 IntvOut = Cand.IntvIdx; 1138 Cand.Intf.moveToBlock(Number); 1139 IntfOut = Cand.Intf.last(); 1140 } 1141 if (!IntvIn && !IntvOut) 1142 continue; 1143 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1144 } 1145 } 1146 1147 ++NumGlobalSplits; 1148 1149 SmallVector<unsigned, 8> IntvMap; 1150 SE->finish(&IntvMap); 1151 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1152 1153 unsigned OrigBlocks = SA->getNumLiveBlocks(); 1154 1155 // Sort out the new intervals created by splitting. We get four kinds: 1156 // - Remainder intervals should not be split again. 1157 // - Candidate intervals can be assigned to Cand.PhysReg. 1158 // - Block-local splits are candidates for local splitting. 1159 // - DCE leftovers should go back on the queue. 1160 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { 1161 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); 1162 1163 // Ignore old intervals from DCE. 1164 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New) 1165 continue; 1166 1167 // Remainder interval. Don't try splitting again, spill if it doesn't 1168 // allocate. 1169 if (IntvMap[I] == 0) { 1170 ExtraInfo->setStage(Reg, RS_Spill); 1171 continue; 1172 } 1173 1174 // Global intervals. Allow repeated splitting as long as the number of live 1175 // blocks is strictly decreasing. 1176 if (IntvMap[I] < NumGlobalIntvs) { 1177 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 1178 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 1179 << " blocks as original.\n"); 1180 // Don't allow repeated splitting as a safe guard against looping. 1181 ExtraInfo->setStage(Reg, RS_Split2); 1182 } 1183 continue; 1184 } 1185 1186 // Other intervals are treated as new. This includes local intervals created 1187 // for blocks with multiple uses, and anything created by DCE. 1188 } 1189 1190 if (VerifyEnabled) 1191 MF->verify(LIS, Indexes, "After splitting live range around region", 1192 &errs()); 1193 } 1194 1195 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg, 1196 AllocationOrder &Order, 1197 SmallVectorImpl<Register> &NewVRegs) { 1198 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg)) 1199 return MCRegister::NoRegister; 1200 unsigned NumCands = 0; 1201 BlockFrequency SpillCost = calcSpillCost(); 1202 BlockFrequency BestCost; 1203 1204 // Check if we can split this live range around a compact region. 1205 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1206 if (HasCompact) { 1207 // Yes, keep GlobalCand[0] as the compact region candidate. 1208 NumCands = 1; 1209 BestCost = BlockFrequency::max(); 1210 } else { 1211 // No benefit from the compact region, our fallback will be per-block 1212 // splitting. Make sure we find a solution that is cheaper than spilling. 1213 BestCost = SpillCost; 1214 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = " 1215 << printBlockFreq(*MBFI, BestCost) << '\n'); 1216 } 1217 1218 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 1219 NumCands, false /*IgnoreCSR*/); 1220 1221 // No solutions found, fall back to single block splitting. 1222 if (!HasCompact && BestCand == NoCand) 1223 return MCRegister::NoRegister; 1224 1225 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); 1226 } 1227 1228 unsigned 1229 RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg, 1230 AllocationOrder &Order, 1231 BlockFrequency &BestCost, 1232 unsigned &NumCands, 1233 unsigned &BestCand) { 1234 // Discard bad candidates before we run out of interference cache cursors. 1235 // This will only affect register classes with a lot of registers (>32). 1236 if (NumCands == IntfCache.getMaxCursors()) { 1237 unsigned WorstCount = ~0u; 1238 unsigned Worst = 0; 1239 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) { 1240 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg) 1241 continue; 1242 unsigned Count = GlobalCand[CandIndex].LiveBundles.count(); 1243 if (Count < WorstCount) { 1244 Worst = CandIndex; 1245 WorstCount = Count; 1246 } 1247 } 1248 --NumCands; 1249 GlobalCand[Worst] = GlobalCand[NumCands]; 1250 if (BestCand == NumCands) 1251 BestCand = Worst; 1252 } 1253 1254 if (GlobalCand.size() <= NumCands) 1255 GlobalCand.resize(NumCands+1); 1256 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1257 Cand.reset(IntfCache, PhysReg); 1258 1259 SpillPlacer->prepare(Cand.LiveBundles); 1260 BlockFrequency Cost; 1261 if (!addSplitConstraints(Cand.Intf, Cost)) { 1262 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); 1263 return BestCand; 1264 } 1265 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) 1266 << "\tstatic = " << printBlockFreq(*MBFI, Cost)); 1267 if (Cost >= BestCost) { 1268 LLVM_DEBUG({ 1269 if (BestCand == NoCand) 1270 dbgs() << " worse than no bundles\n"; 1271 else 1272 dbgs() << " worse than " 1273 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1274 }); 1275 return BestCand; 1276 } 1277 if (!growRegion(Cand)) { 1278 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); 1279 return BestCand; 1280 } 1281 1282 SpillPlacer->finish(); 1283 1284 // No live bundles, defer to splitSingleBlocks(). 1285 if (!Cand.LiveBundles.any()) { 1286 LLVM_DEBUG(dbgs() << " no bundles.\n"); 1287 return BestCand; 1288 } 1289 1290 Cost += calcGlobalSplitCost(Cand, Order); 1291 LLVM_DEBUG({ 1292 dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles"; 1293 for (int I : Cand.LiveBundles.set_bits()) 1294 dbgs() << " EB#" << I; 1295 dbgs() << ".\n"; 1296 }); 1297 if (Cost < BestCost) { 1298 BestCand = NumCands; 1299 BestCost = Cost; 1300 } 1301 ++NumCands; 1302 1303 return BestCand; 1304 } 1305 1306 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, 1307 AllocationOrder &Order, 1308 BlockFrequency &BestCost, 1309 unsigned &NumCands, 1310 bool IgnoreCSR) { 1311 unsigned BestCand = NoCand; 1312 for (MCPhysReg PhysReg : Order) { 1313 assert(PhysReg); 1314 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg)) 1315 continue; 1316 1317 calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands, 1318 BestCand); 1319 } 1320 1321 return BestCand; 1322 } 1323 1324 MCRegister RAGreedy::doRegionSplit(const LiveInterval &VirtReg, 1325 unsigned BestCand, bool HasCompact, 1326 SmallVectorImpl<Register> &NewVRegs) { 1327 SmallVector<unsigned, 8> UsedCands; 1328 // Prepare split editor. 1329 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1330 SE->reset(LREdit, SplitSpillMode); 1331 1332 // Assign all edge bundles to the preferred candidate, or NoCand. 1333 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1334 1335 // Assign bundles for the best candidate region. 1336 if (BestCand != NoCand) { 1337 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1338 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1339 UsedCands.push_back(BestCand); 1340 Cand.IntvIdx = SE->openIntv(); 1341 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in " 1342 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1343 (void)B; 1344 } 1345 } 1346 1347 // Assign bundles for the compact region. 1348 if (HasCompact) { 1349 GlobalSplitCandidate &Cand = GlobalCand.front(); 1350 assert(!Cand.PhysReg && "Compact region has no physreg"); 1351 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1352 UsedCands.push_back(0); 1353 Cand.IntvIdx = SE->openIntv(); 1354 LLVM_DEBUG(dbgs() << "Split for compact region in " << B 1355 << " bundles, intv " << Cand.IntvIdx << ".\n"); 1356 (void)B; 1357 } 1358 } 1359 1360 splitAroundRegion(LREdit, UsedCands); 1361 return MCRegister(); 1362 } 1363 1364 // VirtReg has a physical Hint, this function tries to split VirtReg around 1365 // Hint if we can place new COPY instructions in cold blocks. 1366 bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint, 1367 const LiveInterval &VirtReg, 1368 SmallVectorImpl<Register> &NewVRegs, 1369 AllocationOrder &Order) { 1370 // Split the VirtReg may generate COPY instructions in multiple cold basic 1371 // blocks, and increase code size. So we avoid it when the function is 1372 // optimized for size. 1373 if (MF->getFunction().hasOptSize()) 1374 return false; 1375 1376 // Don't allow repeated splitting as a safe guard against looping. 1377 if (ExtraInfo->getStage(VirtReg) >= RS_Split2) 1378 return false; 1379 1380 BlockFrequency Cost = BlockFrequency(0); 1381 Register Reg = VirtReg.reg(); 1382 1383 // Compute the cost of assigning a non Hint physical register to VirtReg. 1384 // We define it as the total frequency of broken COPY instructions to/from 1385 // Hint register, and after split, they can be deleted. 1386 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { 1387 if (!TII->isFullCopyInstr(Instr)) 1388 continue; 1389 Register OtherReg = Instr.getOperand(1).getReg(); 1390 if (OtherReg == Reg) { 1391 OtherReg = Instr.getOperand(0).getReg(); 1392 if (OtherReg == Reg) 1393 continue; 1394 // Check if VirtReg interferes with OtherReg after this COPY instruction. 1395 if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot())) 1396 continue; 1397 } 1398 MCRegister OtherPhysReg = 1399 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg); 1400 if (OtherPhysReg == Hint) 1401 Cost += MBFI->getBlockFreq(Instr.getParent()); 1402 } 1403 1404 // Decrease the cost so it will be split in colder blocks. 1405 BranchProbability Threshold(SplitThresholdForRegWithHint, 100); 1406 Cost *= Threshold; 1407 if (Cost == BlockFrequency(0)) 1408 return false; 1409 1410 unsigned NumCands = 0; 1411 unsigned BestCand = NoCand; 1412 SA->analyze(&VirtReg); 1413 calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand); 1414 if (BestCand == NoCand) 1415 return false; 1416 1417 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 1418 return true; 1419 } 1420 1421 //===----------------------------------------------------------------------===// 1422 // Per-Block Splitting 1423 //===----------------------------------------------------------------------===// 1424 1425 /// tryBlockSplit - Split a global live range around every block with uses. This 1426 /// creates a lot of local live ranges, that will be split by tryLocalSplit if 1427 /// they don't allocate. 1428 MCRegister RAGreedy::tryBlockSplit(const LiveInterval &VirtReg, 1429 AllocationOrder &Order, 1430 SmallVectorImpl<Register> &NewVRegs) { 1431 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1432 Register Reg = VirtReg.reg(); 1433 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1434 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1435 SE->reset(LREdit, SplitSpillMode); 1436 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1437 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { 1438 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1439 SE->splitSingleBlock(BI); 1440 } 1441 // No blocks were split. 1442 if (LREdit.empty()) 1443 return MCRegister(); 1444 1445 // We did split for some blocks. 1446 SmallVector<unsigned, 8> IntvMap; 1447 SE->finish(&IntvMap); 1448 1449 // Tell LiveDebugVariables about the new ranges. 1450 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1451 1452 // Sort out the new intervals created by splitting. The remainder interval 1453 // goes straight to spilling, the new local ranges get to stay RS_New. 1454 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { 1455 const LiveInterval &LI = LIS->getInterval(LREdit.get(I)); 1456 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0) 1457 ExtraInfo->setStage(LI, RS_Spill); 1458 } 1459 1460 if (VerifyEnabled) 1461 MF->verify(LIS, Indexes, "After splitting live range around basic blocks", 1462 &errs()); 1463 return MCRegister(); 1464 } 1465 1466 //===----------------------------------------------------------------------===// 1467 // Per-Instruction Splitting 1468 //===----------------------------------------------------------------------===// 1469 1470 /// Get the number of allocatable registers that match the constraints of \p Reg 1471 /// on \p MI and that are also in \p SuperRC. 1472 static unsigned getNumAllocatableRegsForConstraints( 1473 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, 1474 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1475 const RegisterClassInfo &RCI) { 1476 assert(SuperRC && "Invalid register class"); 1477 1478 const TargetRegisterClass *ConstrainedRC = 1479 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1480 /* ExploreBundle */ true); 1481 if (!ConstrainedRC) 1482 return 0; 1483 return RCI.getNumAllocatableRegs(ConstrainedRC); 1484 } 1485 1486 static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, 1487 const TargetRegisterInfo &TRI, 1488 const MachineInstr &FirstMI, 1489 Register Reg) { 1490 LaneBitmask Mask; 1491 SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; 1492 (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops); 1493 1494 for (auto [MI, OpIdx] : Ops) { 1495 const MachineOperand &MO = MI->getOperand(OpIdx); 1496 assert(MO.isReg() && MO.getReg() == Reg); 1497 unsigned SubReg = MO.getSubReg(); 1498 if (SubReg == 0 && MO.isUse()) { 1499 if (MO.isUndef()) 1500 continue; 1501 return MRI.getMaxLaneMaskForVReg(Reg); 1502 } 1503 1504 LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); 1505 if (MO.isDef()) { 1506 if (!MO.isUndef()) 1507 Mask |= ~SubRegMask; 1508 } else 1509 Mask |= SubRegMask; 1510 } 1511 1512 return Mask; 1513 } 1514 1515 /// Return true if \p MI at \P Use reads a subset of the lanes live in \p 1516 /// VirtReg. 1517 static bool readsLaneSubset(const MachineRegisterInfo &MRI, 1518 const MachineInstr *MI, const LiveInterval &VirtReg, 1519 const TargetRegisterInfo *TRI, SlotIndex Use, 1520 const TargetInstrInfo *TII) { 1521 // Early check the common case. Beware of the semi-formed bundles SplitKit 1522 // creates by setting the bundle flag on copies without a matching BUNDLE. 1523 1524 auto DestSrc = TII->isCopyInstr(*MI); 1525 if (DestSrc && !MI->isBundled() && 1526 DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg()) 1527 return false; 1528 1529 // FIXME: We're only considering uses, but should be consider defs too? 1530 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg()); 1531 1532 LaneBitmask LiveAtMask; 1533 for (const LiveInterval::SubRange &S : VirtReg.subranges()) { 1534 if (S.liveAt(Use)) 1535 LiveAtMask |= S.LaneMask; 1536 } 1537 1538 // If the live lanes aren't different from the lanes used by the instruction, 1539 // this doesn't help. 1540 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any(); 1541 } 1542 1543 /// tryInstructionSplit - Split a live range around individual instructions. 1544 /// This is normally not worthwhile since the spiller is doing essentially the 1545 /// same thing. However, when the live range is in a constrained register 1546 /// class, it may help to insert copies such that parts of the live range can 1547 /// be moved to a larger register class. 1548 /// 1549 /// This is similar to spilling to a larger register class. 1550 MCRegister RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, 1551 AllocationOrder &Order, 1552 SmallVectorImpl<Register> &NewVRegs) { 1553 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); 1554 // There is no point to this if there are no larger sub-classes. 1555 1556 bool SplitSubClass = true; 1557 if (!RegClassInfo.isProperSubClass(CurRC)) { 1558 if (!VirtReg.hasSubRanges()) 1559 return MCRegister(); 1560 SplitSubClass = false; 1561 } 1562 1563 // Always enable split spill mode, since we're effectively spilling to a 1564 // register. 1565 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1566 SE->reset(LREdit, SplitEditor::SM_Size); 1567 1568 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1569 if (Uses.size() <= 1) 1570 return MCRegister(); 1571 1572 LLVM_DEBUG(dbgs() << "Split around " << Uses.size() 1573 << " individual instrs.\n"); 1574 1575 const TargetRegisterClass *SuperRC = 1576 TRI->getLargestLegalSuperClass(CurRC, *MF); 1577 unsigned SuperRCNumAllocatableRegs = 1578 RegClassInfo.getNumAllocatableRegs(SuperRC); 1579 // Split around every non-copy instruction if this split will relax 1580 // the constraints on the virtual register. 1581 // Otherwise, splitting just inserts uncoalescable copies that do not help 1582 // the allocation. 1583 for (const SlotIndex Use : Uses) { 1584 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) { 1585 if (TII->isFullCopyInstr(*MI) || 1586 (SplitSubClass && 1587 SuperRCNumAllocatableRegs == 1588 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, 1589 TII, TRI, RegClassInfo)) || 1590 // TODO: Handle split for subranges with subclass constraints? 1591 (!SplitSubClass && VirtReg.hasSubRanges() && 1592 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) { 1593 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); 1594 continue; 1595 } 1596 } 1597 SE->openIntv(); 1598 SlotIndex SegStart = SE->enterIntvBefore(Use); 1599 SlotIndex SegStop = SE->leaveIntvAfter(Use); 1600 SE->useIntv(SegStart, SegStop); 1601 } 1602 1603 if (LREdit.empty()) { 1604 LLVM_DEBUG(dbgs() << "All uses were copies.\n"); 1605 return MCRegister(); 1606 } 1607 1608 SmallVector<unsigned, 8> IntvMap; 1609 SE->finish(&IntvMap); 1610 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); 1611 // Assign all new registers to RS_Spill. This was the last chance. 1612 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1613 return MCRegister(); 1614 } 1615 1616 //===----------------------------------------------------------------------===// 1617 // Local Splitting 1618 //===----------------------------------------------------------------------===// 1619 1620 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1621 /// in order to use PhysReg between two entries in SA->UseSlots. 1622 /// 1623 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1]. 1624 /// 1625 void RAGreedy::calcGapWeights(MCRegister PhysReg, 1626 SmallVectorImpl<float> &GapWeight) { 1627 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1628 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1629 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1630 const unsigned NumGaps = Uses.size()-1; 1631 1632 // Start and end points for the interference check. 1633 SlotIndex StartIdx = 1634 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1635 SlotIndex StopIdx = 1636 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1637 1638 GapWeight.assign(NumGaps, 0.0f); 1639 1640 // Add interference from each overlapping register. 1641 for (MCRegUnit Unit : TRI->regunits(PhysReg)) { 1642 if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit) 1643 .checkInterference()) 1644 continue; 1645 1646 // We know that VirtReg is a continuous interval from FirstInstr to 1647 // LastInstr, so we don't need InterferenceQuery. 1648 // 1649 // Interference that overlaps an instruction is counted in both gaps 1650 // surrounding the instruction. The exception is interference before 1651 // StartIdx and after StopIdx. 1652 // 1653 LiveIntervalUnion::SegmentIter IntI = 1654 Matrix->getLiveUnions()[Unit].find(StartIdx); 1655 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1656 // Skip the gaps before IntI. 1657 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1658 if (++Gap == NumGaps) 1659 break; 1660 if (Gap == NumGaps) 1661 break; 1662 1663 // Update the gaps covered by IntI. 1664 const float weight = IntI.value()->weight(); 1665 for (; Gap != NumGaps; ++Gap) { 1666 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1667 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1668 break; 1669 } 1670 if (Gap == NumGaps) 1671 break; 1672 } 1673 } 1674 1675 // Add fixed interference. 1676 for (MCRegUnit Unit : TRI->regunits(PhysReg)) { 1677 const LiveRange &LR = LIS->getRegUnit(Unit); 1678 LiveRange::const_iterator I = LR.find(StartIdx); 1679 LiveRange::const_iterator E = LR.end(); 1680 1681 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1682 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1683 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1684 if (++Gap == NumGaps) 1685 break; 1686 if (Gap == NumGaps) 1687 break; 1688 1689 for (; Gap != NumGaps; ++Gap) { 1690 GapWeight[Gap] = huge_valf; 1691 if (Uses[Gap+1].getBaseIndex() >= I->end) 1692 break; 1693 } 1694 if (Gap == NumGaps) 1695 break; 1696 } 1697 } 1698 } 1699 1700 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1701 /// basic block. 1702 /// 1703 MCRegister RAGreedy::tryLocalSplit(const LiveInterval &VirtReg, 1704 AllocationOrder &Order, 1705 SmallVectorImpl<Register> &NewVRegs) { 1706 // TODO: the function currently only handles a single UseBlock; it should be 1707 // possible to generalize. 1708 if (SA->getUseBlocks().size() != 1) 1709 return MCRegister(); 1710 1711 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1712 1713 // Note that it is possible to have an interval that is live-in or live-out 1714 // while only covering a single block - A phi-def can use undef values from 1715 // predecessors, and the block could be a single-block loop. 1716 // We don't bother doing anything clever about such a case, we simply assume 1717 // that the interval is continuous from FirstInstr to LastInstr. We should 1718 // make sure that we don't do anything illegal to such an interval, though. 1719 1720 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1721 if (Uses.size() <= 2) 1722 return MCRegister(); 1723 const unsigned NumGaps = Uses.size()-1; 1724 1725 LLVM_DEBUG({ 1726 dbgs() << "tryLocalSplit: "; 1727 for (const auto &Use : Uses) 1728 dbgs() << ' ' << Use; 1729 dbgs() << '\n'; 1730 }); 1731 1732 // If VirtReg is live across any register mask operands, compute a list of 1733 // gaps with register masks. 1734 SmallVector<unsigned, 8> RegMaskGaps; 1735 if (Matrix->checkRegMaskInterference(VirtReg)) { 1736 // Get regmask slots for the whole block. 1737 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1738 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1739 // Constrain to VirtReg's live range. 1740 unsigned RI = 1741 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin(); 1742 unsigned RE = RMS.size(); 1743 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) { 1744 // Look for Uses[I] <= RMS <= Uses[I + 1]. 1745 assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I])); 1746 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI])) 1747 continue; 1748 // Skip a regmask on the same instruction as the last use. It doesn't 1749 // overlap the live range. 1750 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps) 1751 break; 1752 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-' 1753 << Uses[I + 1]); 1754 RegMaskGaps.push_back(I); 1755 // Advance ri to the next gap. A regmask on one of the uses counts in 1756 // both gaps. 1757 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1])) 1758 ++RI; 1759 } 1760 LLVM_DEBUG(dbgs() << '\n'); 1761 } 1762 1763 // Since we allow local split results to be split again, there is a risk of 1764 // creating infinite loops. It is tempting to require that the new live 1765 // ranges have less instructions than the original. That would guarantee 1766 // convergence, but it is too strict. A live range with 3 instructions can be 1767 // split 2+3 (including the COPY), and we want to allow that. 1768 // 1769 // Instead we use these rules: 1770 // 1771 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1772 // noop split, of course). 1773 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1774 // the new ranges must have fewer instructions than before the split. 1775 // 3. New ranges with the same number of instructions are marked RS_Split2, 1776 // smaller ranges are marked RS_New. 1777 // 1778 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1779 // excessive splitting and infinite loops. 1780 // 1781 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2; 1782 1783 // Best split candidate. 1784 unsigned BestBefore = NumGaps; 1785 unsigned BestAfter = 0; 1786 float BestDiff = 0; 1787 1788 const float blockFreq = 1789 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1790 (1.0f / MBFI->getEntryFreq().getFrequency()); 1791 SmallVector<float, 8> GapWeight; 1792 1793 for (MCPhysReg PhysReg : Order) { 1794 assert(PhysReg); 1795 // Keep track of the largest spill weight that would need to be evicted in 1796 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1]. 1797 calcGapWeights(PhysReg, GapWeight); 1798 1799 // Remove any gaps with regmask clobbers. 1800 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1801 for (unsigned Gap : RegMaskGaps) 1802 GapWeight[Gap] = huge_valf; 1803 1804 // Try to find the best sequence of gaps to close. 1805 // The new spill weight must be larger than any gap interference. 1806 1807 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1808 unsigned SplitBefore = 0, SplitAfter = 1; 1809 1810 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1811 // It is the spill weight that needs to be evicted. 1812 float MaxGap = GapWeight[0]; 1813 1814 while (true) { 1815 // Live before/after split? 1816 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1817 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1818 1819 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore] 1820 << '-' << Uses[SplitAfter] << " I=" << MaxGap); 1821 1822 // Stop before the interval gets so big we wouldn't be making progress. 1823 if (!LiveBefore && !LiveAfter) { 1824 LLVM_DEBUG(dbgs() << " all\n"); 1825 break; 1826 } 1827 // Should the interval be extended or shrunk? 1828 bool Shrink = true; 1829 1830 // How many gaps would the new range have? 1831 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1832 1833 // Legally, without causing looping? 1834 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1835 1836 if (Legal && MaxGap < huge_valf) { 1837 // Estimate the new spill weight. Each instruction reads or writes the 1838 // register. Conservatively assume there are no read-modify-write 1839 // instructions. 1840 // 1841 // Try to guess the size of the new interval. 1842 const float EstWeight = normalizeSpillWeight( 1843 blockFreq * (NewGaps + 1), 1844 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1845 (LiveBefore + LiveAfter) * SlotIndex::InstrDist, 1846 1); 1847 // Would this split be possible to allocate? 1848 // Never allocate all gaps, we wouldn't be making progress. 1849 LLVM_DEBUG(dbgs() << " w=" << EstWeight); 1850 if (EstWeight * Hysteresis >= MaxGap) { 1851 Shrink = false; 1852 float Diff = EstWeight - MaxGap; 1853 if (Diff > BestDiff) { 1854 LLVM_DEBUG(dbgs() << " (best)"); 1855 BestDiff = Hysteresis * Diff; 1856 BestBefore = SplitBefore; 1857 BestAfter = SplitAfter; 1858 } 1859 } 1860 } 1861 1862 // Try to shrink. 1863 if (Shrink) { 1864 if (++SplitBefore < SplitAfter) { 1865 LLVM_DEBUG(dbgs() << " shrink\n"); 1866 // Recompute the max when necessary. 1867 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1868 MaxGap = GapWeight[SplitBefore]; 1869 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I) 1870 MaxGap = std::max(MaxGap, GapWeight[I]); 1871 } 1872 continue; 1873 } 1874 MaxGap = 0; 1875 } 1876 1877 // Try to extend the interval. 1878 if (SplitAfter >= NumGaps) { 1879 LLVM_DEBUG(dbgs() << " end\n"); 1880 break; 1881 } 1882 1883 LLVM_DEBUG(dbgs() << " extend\n"); 1884 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1885 } 1886 } 1887 1888 // Didn't find any candidates? 1889 if (BestBefore == NumGaps) 1890 return MCRegister(); 1891 1892 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-' 1893 << Uses[BestAfter] << ", " << BestDiff << ", " 1894 << (BestAfter - BestBefore + 1) << " instrs\n"); 1895 1896 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1897 SE->reset(LREdit); 1898 1899 SE->openIntv(); 1900 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1901 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1902 SE->useIntv(SegStart, SegStop); 1903 SmallVector<unsigned, 8> IntvMap; 1904 SE->finish(&IntvMap); 1905 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); 1906 // If the new range has the same number of instructions as before, mark it as 1907 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1908 // leave the new intervals as RS_New so they can compete. 1909 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1910 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1911 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1912 if (NewGaps >= NumGaps) { 1913 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:"); 1914 assert(!ProgressRequired && "Didn't make progress when it was required."); 1915 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I) 1916 if (IntvMap[I] == 1) { 1917 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2); 1918 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I))); 1919 } 1920 LLVM_DEBUG(dbgs() << '\n'); 1921 } 1922 ++NumLocalSplits; 1923 1924 return MCRegister(); 1925 } 1926 1927 //===----------------------------------------------------------------------===// 1928 // Live Range Splitting 1929 //===----------------------------------------------------------------------===// 1930 1931 /// trySplit - Try to split VirtReg or one of its interferences, making it 1932 /// assignable. 1933 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1934 MCRegister RAGreedy::trySplit(const LiveInterval &VirtReg, 1935 AllocationOrder &Order, 1936 SmallVectorImpl<Register> &NewVRegs, 1937 const SmallVirtRegSet &FixedRegisters) { 1938 // Ranges must be Split2 or less. 1939 if (ExtraInfo->getStage(VirtReg) >= RS_Spill) 1940 return MCRegister(); 1941 1942 // Local intervals are handled separately. 1943 if (LIS->intervalIsInOneMBB(VirtReg)) { 1944 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName, 1945 TimerGroupDescription, TimePassesIsEnabled); 1946 SA->analyze(&VirtReg); 1947 MCRegister PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1948 if (PhysReg || !NewVRegs.empty()) 1949 return PhysReg; 1950 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1951 } 1952 1953 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName, 1954 TimerGroupDescription, TimePassesIsEnabled); 1955 1956 SA->analyze(&VirtReg); 1957 1958 // First try to split around a region spanning multiple blocks. RS_Split2 1959 // ranges already made dubious progress with region splitting, so they go 1960 // straight to single block splitting. 1961 if (ExtraInfo->getStage(VirtReg) < RS_Split2) { 1962 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1963 if (PhysReg || !NewVRegs.empty()) 1964 return PhysReg; 1965 } 1966 1967 // Then isolate blocks. 1968 return tryBlockSplit(VirtReg, Order, NewVRegs); 1969 } 1970 1971 //===----------------------------------------------------------------------===// 1972 // Last Chance Recoloring 1973 //===----------------------------------------------------------------------===// 1974 1975 /// Return true if \p reg has any tied def operand. 1976 static bool hasTiedDef(MachineRegisterInfo *MRI, Register reg) { 1977 for (const MachineOperand &MO : MRI->def_operands(reg)) 1978 if (MO.isTied()) 1979 return true; 1980 1981 return false; 1982 } 1983 1984 /// Return true if the existing assignment of \p Intf overlaps, but is not the 1985 /// same, as \p PhysReg. 1986 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, 1987 const VirtRegMap &VRM, 1988 MCRegister PhysReg, 1989 const LiveInterval &Intf) { 1990 MCRegister AssignedReg = VRM.getPhys(Intf.reg()); 1991 if (PhysReg == AssignedReg) 1992 return false; 1993 return TRI.regsOverlap(PhysReg, AssignedReg); 1994 } 1995 1996 /// mayRecolorAllInterferences - Check if the virtual registers that 1997 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 1998 /// recolored to free \p PhysReg. 1999 /// When true is returned, \p RecoloringCandidates has been augmented with all 2000 /// the live intervals that need to be recolored in order to free \p PhysReg 2001 /// for \p VirtReg. 2002 /// \p FixedRegisters contains all the virtual registers that cannot be 2003 /// recolored. 2004 bool RAGreedy::mayRecolorAllInterferences( 2005 MCRegister PhysReg, const LiveInterval &VirtReg, 2006 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) { 2007 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); 2008 2009 for (MCRegUnit Unit : TRI->regunits(PhysReg)) { 2010 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit); 2011 // If there is LastChanceRecoloringMaxInterference or more interferences, 2012 // chances are one would not be recolorable. 2013 if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >= 2014 LastChanceRecoloringMaxInterference && 2015 !ExhaustiveSearch) { 2016 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n"); 2017 CutOffInfo |= CO_Interf; 2018 return false; 2019 } 2020 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { 2021 // If Intf is done and sits on the same register class as VirtReg, it 2022 // would not be recolorable as it is in the same state as 2023 // VirtReg. However there are at least two exceptions. 2024 // 2025 // If VirtReg has tied defs and Intf doesn't, then 2026 // there is still a point in examining if it can be recolorable. 2027 // 2028 // Additionally, if the register class has overlapping tuple members, it 2029 // may still be recolorable using a different tuple. This is more likely 2030 // if the existing assignment aliases with the candidate. 2031 // 2032 if (((ExtraInfo->getStage(*Intf) == RS_Done && 2033 MRI->getRegClass(Intf->reg()) == CurRC && 2034 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) && 2035 !(hasTiedDef(MRI, VirtReg.reg()) && 2036 !hasTiedDef(MRI, Intf->reg()))) || 2037 FixedRegisters.count(Intf->reg())) { 2038 LLVM_DEBUG( 2039 dbgs() << "Early abort: the interference is not recolorable.\n"); 2040 return false; 2041 } 2042 RecoloringCandidates.insert(Intf); 2043 } 2044 } 2045 return true; 2046 } 2047 2048 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 2049 /// its interferences. 2050 /// Last chance recoloring chooses a color for \p VirtReg and recolors every 2051 /// virtual register that was using it. The recoloring process may recursively 2052 /// use the last chance recoloring. Therefore, when a virtual register has been 2053 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 2054 /// be last-chance-recolored again during this recoloring "session". 2055 /// E.g., 2056 /// Let 2057 /// vA can use {R1, R2 } 2058 /// vB can use { R2, R3} 2059 /// vC can use {R1 } 2060 /// Where vA, vB, and vC cannot be split anymore (they are reloads for 2061 /// instance) and they all interfere. 2062 /// 2063 /// vA is assigned R1 2064 /// vB is assigned R2 2065 /// vC tries to evict vA but vA is already done. 2066 /// Regular register allocation fails. 2067 /// 2068 /// Last chance recoloring kicks in: 2069 /// vC does as if vA was evicted => vC uses R1. 2070 /// vC is marked as fixed. 2071 /// vA needs to find a color. 2072 /// None are available. 2073 /// vA cannot evict vC: vC is a fixed virtual register now. 2074 /// vA does as if vB was evicted => vA uses R2. 2075 /// vB needs to find a color. 2076 /// R3 is available. 2077 /// Recoloring => vC = R1, vA = R2, vB = R3 2078 /// 2079 /// \p Order defines the preferred allocation order for \p VirtReg. 2080 /// \p NewRegs will contain any new virtual register that have been created 2081 /// (split, spill) during the process and that must be assigned. 2082 /// \p FixedRegisters contains all the virtual registers that cannot be 2083 /// recolored. 2084 /// 2085 /// \p RecolorStack tracks the original assignments of successfully recolored 2086 /// registers. 2087 /// 2088 /// \p Depth gives the current depth of the last chance recoloring. 2089 /// \return a physical register that can be used for VirtReg or ~0u if none 2090 /// exists. 2091 MCRegister RAGreedy::tryLastChanceRecoloring( 2092 const LiveInterval &VirtReg, AllocationOrder &Order, 2093 SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, 2094 RecoloringStack &RecolorStack, unsigned Depth) { 2095 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg)) 2096 return ~0u; 2097 2098 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 2099 2100 const ssize_t EntryStackSize = RecolorStack.size(); 2101 2102 // Ranges must be Done. 2103 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 2104 "Last chance recoloring should really be last chance"); 2105 // Set the max depth to LastChanceRecoloringMaxDepth. 2106 // We may want to reconsider that if we end up with a too large search space 2107 // for target with hundreds of registers. 2108 // Indeed, in that case we may want to cut the search space earlier. 2109 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { 2110 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 2111 CutOffInfo |= CO_Depth; 2112 return ~0u; 2113 } 2114 2115 // Set of Live intervals that will need to be recolored. 2116 SmallLISet RecoloringCandidates; 2117 2118 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 2119 // this recoloring "session". 2120 assert(!FixedRegisters.count(VirtReg.reg())); 2121 FixedRegisters.insert(VirtReg.reg()); 2122 SmallVector<Register, 4> CurrentNewVRegs; 2123 2124 for (MCRegister PhysReg : Order) { 2125 assert(PhysReg.isValid()); 2126 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 2127 << printReg(PhysReg, TRI) << '\n'); 2128 RecoloringCandidates.clear(); 2129 CurrentNewVRegs.clear(); 2130 2131 // It is only possible to recolor virtual register interference. 2132 if (Matrix->checkInterference(VirtReg, PhysReg) > 2133 LiveRegMatrix::IK_VirtReg) { 2134 LLVM_DEBUG( 2135 dbgs() << "Some interferences are not with virtual registers.\n"); 2136 2137 continue; 2138 } 2139 2140 // Early give up on this PhysReg if it is obvious we cannot recolor all 2141 // the interferences. 2142 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 2143 FixedRegisters)) { 2144 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n"); 2145 continue; 2146 } 2147 2148 // RecoloringCandidates contains all the virtual registers that interfere 2149 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for 2150 // recoloring and perform the actual recoloring. 2151 PQueue RecoloringQueue; 2152 for (const LiveInterval *RC : RecoloringCandidates) { 2153 Register ItVirtReg = RC->reg(); 2154 enqueue(RecoloringQueue, RC); 2155 assert(VRM->hasPhys(ItVirtReg) && 2156 "Interferences are supposed to be with allocated variables"); 2157 2158 // Record the current allocation. 2159 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg))); 2160 2161 // unset the related struct. 2162 Matrix->unassign(*RC); 2163 } 2164 2165 // Do as if VirtReg was assigned to PhysReg so that the underlying 2166 // recoloring has the right information about the interferes and 2167 // available colors. 2168 Matrix->assign(VirtReg, PhysReg); 2169 2170 // VirtReg may be deleted during tryRecoloringCandidates, save a copy. 2171 Register ThisVirtReg = VirtReg.reg(); 2172 2173 // Save the current recoloring state. 2174 // If we cannot recolor all the interferences, we will have to start again 2175 // at this point for the next physical register. 2176 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 2177 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, 2178 FixedRegisters, RecolorStack, Depth)) { 2179 // Push the queued vregs into the main queue. 2180 llvm::append_range(NewVRegs, CurrentNewVRegs); 2181 // Do not mess up with the global assignment process. 2182 // I.e., VirtReg must be unassigned. 2183 if (VRM->hasPhys(ThisVirtReg)) { 2184 Matrix->unassign(VirtReg); 2185 return PhysReg; 2186 } 2187 2188 // It is possible VirtReg will be deleted during tryRecoloringCandidates. 2189 LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register " 2190 << printReg(ThisVirtReg) << '\n'); 2191 FixedRegisters.erase(ThisVirtReg); 2192 return MCRegister(); 2193 } 2194 2195 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 2196 << printReg(PhysReg, TRI) << '\n'); 2197 2198 // The recoloring attempt failed, undo the changes. 2199 FixedRegisters = SaveFixedRegisters; 2200 Matrix->unassign(VirtReg); 2201 2202 // For a newly created vreg which is also in RecoloringCandidates, 2203 // don't add it to NewVRegs because its physical register will be restored 2204 // below. Other vregs in CurrentNewVRegs are created by calling 2205 // selectOrSplit and should be added into NewVRegs. 2206 for (Register R : CurrentNewVRegs) { 2207 if (RecoloringCandidates.count(&LIS->getInterval(R))) 2208 continue; 2209 NewVRegs.push_back(R); 2210 } 2211 2212 // Roll back our unsuccessful recoloring. Also roll back any successful 2213 // recolorings in any recursive recoloring attempts, since it's possible 2214 // they would have introduced conflicts with assignments we will be 2215 // restoring further up the stack. Perform all unassignments prior to 2216 // reassigning, since sub-recolorings may have conflicted with the registers 2217 // we are going to restore to their original assignments. 2218 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) { 2219 const LiveInterval *LI; 2220 MCRegister PhysReg; 2221 std::tie(LI, PhysReg) = RecolorStack[I]; 2222 2223 if (VRM->hasPhys(LI->reg())) 2224 Matrix->unassign(*LI); 2225 } 2226 2227 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) { 2228 const LiveInterval *LI; 2229 MCRegister PhysReg; 2230 std::tie(LI, PhysReg) = RecolorStack[I]; 2231 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg())) 2232 Matrix->assign(*LI, PhysReg); 2233 } 2234 2235 // Pop the stack of recoloring attempts. 2236 RecolorStack.resize(EntryStackSize); 2237 } 2238 2239 // Last chance recoloring did not worked either, give up. 2240 return ~0u; 2241 } 2242 2243 /// tryRecoloringCandidates - Try to assign a new color to every register 2244 /// in \RecoloringQueue. 2245 /// \p NewRegs will contain any new virtual register created during the 2246 /// recoloring process. 2247 /// \p FixedRegisters[in/out] contains all the registers that have been 2248 /// recolored. 2249 /// \return true if all virtual registers in RecoloringQueue were successfully 2250 /// recolored, false otherwise. 2251 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 2252 SmallVectorImpl<Register> &NewVRegs, 2253 SmallVirtRegSet &FixedRegisters, 2254 RecoloringStack &RecolorStack, 2255 unsigned Depth) { 2256 while (!RecoloringQueue.empty()) { 2257 const LiveInterval *LI = dequeue(RecoloringQueue); 2258 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 2259 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, 2260 RecolorStack, Depth + 1); 2261 // When splitting happens, the live-range may actually be empty. 2262 // In that case, this is okay to continue the recoloring even 2263 // if we did not find an alternative color for it. Indeed, 2264 // there will not be anything to color for LI in the end. 2265 if (PhysReg == ~0u || (!PhysReg && !LI->empty())) 2266 return false; 2267 2268 if (!PhysReg) { 2269 assert(LI->empty() && "Only empty live-range do not require a register"); 2270 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI 2271 << " succeeded. Empty LI.\n"); 2272 continue; 2273 } 2274 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI 2275 << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); 2276 2277 Matrix->assign(*LI, PhysReg); 2278 FixedRegisters.insert(LI->reg()); 2279 } 2280 return true; 2281 } 2282 2283 //===----------------------------------------------------------------------===// 2284 // Main Entry Point 2285 //===----------------------------------------------------------------------===// 2286 2287 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg, 2288 SmallVectorImpl<Register> &NewVRegs) { 2289 CutOffInfo = CO_None; 2290 LLVMContext &Ctx = MF->getFunction().getContext(); 2291 SmallVirtRegSet FixedRegisters; 2292 RecoloringStack RecolorStack; 2293 MCRegister Reg = 2294 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack); 2295 if (Reg == ~0U && (CutOffInfo != CO_None)) { 2296 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); 2297 if (CutOffEncountered == CO_Depth) 2298 Ctx.emitError("register allocation failed: maximum depth for recoloring " 2299 "reached. Use -fexhaustive-register-search to skip " 2300 "cutoffs"); 2301 else if (CutOffEncountered == CO_Interf) 2302 Ctx.emitError("register allocation failed: maximum interference for " 2303 "recoloring reached. Use -fexhaustive-register-search " 2304 "to skip cutoffs"); 2305 else if (CutOffEncountered == (CO_Depth | CO_Interf)) 2306 Ctx.emitError("register allocation failed: maximum interference and " 2307 "depth for recoloring reached. Use " 2308 "-fexhaustive-register-search to skip cutoffs"); 2309 } 2310 return Reg; 2311 } 2312 2313 /// Using a CSR for the first time has a cost because it causes push|pop 2314 /// to be added to prologue|epilogue. Splitting a cold section of the live 2315 /// range can have lower cost than using the CSR for the first time; 2316 /// Spilling a live range in the cold path can have lower cost than using 2317 /// the CSR for the first time. Returns the physical register if we decide 2318 /// to use the CSR; otherwise return MCRegister(). 2319 MCRegister RAGreedy::tryAssignCSRFirstTime( 2320 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, 2321 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) { 2322 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { 2323 // We choose spill over using the CSR for the first time if the spill cost 2324 // is lower than CSRCost. 2325 SA->analyze(&VirtReg); 2326 if (calcSpillCost() >= CSRCost) 2327 return PhysReg; 2328 2329 // We are going to spill, set CostPerUseLimit to 1 to make sure that 2330 // we will not use a callee-saved register in tryEvict. 2331 CostPerUseLimit = 1; 2332 return MCRegister(); 2333 } 2334 if (ExtraInfo->getStage(VirtReg) < RS_Split) { 2335 // We choose pre-splitting over using the CSR for the first time if 2336 // the cost of splitting is lower than CSRCost. 2337 SA->analyze(&VirtReg); 2338 unsigned NumCands = 0; 2339 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. 2340 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 2341 NumCands, true /*IgnoreCSR*/); 2342 if (BestCand == NoCand) 2343 // Use the CSR if we can't find a region split below CSRCost. 2344 return PhysReg; 2345 2346 // Perform the actual pre-splitting. 2347 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 2348 return MCRegister(); 2349 } 2350 return PhysReg; 2351 } 2352 2353 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) { 2354 // Do not keep invalid information around. 2355 SetOfBrokenHints.remove(&LI); 2356 } 2357 2358 void RAGreedy::initializeCSRCost() { 2359 // We use the command-line option if it is explicitly set, otherwise use the 2360 // larger one out of the command-line option and the value reported by TRI. 2361 CSRCost = BlockFrequency( 2362 CSRFirstTimeCost.getNumOccurrences() 2363 ? CSRFirstTimeCost 2364 : std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); 2365 if (!CSRCost.getFrequency()) 2366 return; 2367 2368 // Raw cost is relative to Entry == 2^14; scale it appropriately. 2369 uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency(); 2370 if (!ActualEntry) { 2371 CSRCost = BlockFrequency(0); 2372 return; 2373 } 2374 uint64_t FixedEntry = 1 << 14; 2375 if (ActualEntry < FixedEntry) 2376 CSRCost *= BranchProbability(ActualEntry, FixedEntry); 2377 else if (ActualEntry <= UINT32_MAX) 2378 // Invert the fraction and divide. 2379 CSRCost /= BranchProbability(FixedEntry, ActualEntry); 2380 else 2381 // Can't use BranchProbability in general, since it takes 32-bit numbers. 2382 CSRCost = 2383 BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry)); 2384 } 2385 2386 /// Collect the hint info for \p Reg. 2387 /// The results are stored into \p Out. 2388 /// \p Out is not cleared before being populated. 2389 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) { 2390 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { 2391 if (!TII->isFullCopyInstr(Instr)) 2392 continue; 2393 // Look for the other end of the copy. 2394 Register OtherReg = Instr.getOperand(0).getReg(); 2395 if (OtherReg == Reg) { 2396 OtherReg = Instr.getOperand(1).getReg(); 2397 if (OtherReg == Reg) 2398 continue; 2399 } 2400 // Get the current assignment. 2401 MCRegister OtherPhysReg = 2402 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg); 2403 // Push the collected information. 2404 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, 2405 OtherPhysReg)); 2406 } 2407 } 2408 2409 /// Using the given \p List, compute the cost of the broken hints if 2410 /// \p PhysReg was used. 2411 /// \return The cost of \p List for \p PhysReg. 2412 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, 2413 MCRegister PhysReg) { 2414 BlockFrequency Cost = BlockFrequency(0); 2415 for (const HintInfo &Info : List) { 2416 if (Info.PhysReg != PhysReg) 2417 Cost += Info.Freq; 2418 } 2419 return Cost; 2420 } 2421 2422 /// Using the register assigned to \p VirtReg, try to recolor 2423 /// all the live ranges that are copy-related with \p VirtReg. 2424 /// The recoloring is then propagated to all the live-ranges that have 2425 /// been recolored and so on, until no more copies can be coalesced or 2426 /// it is not profitable. 2427 /// For a given live range, profitability is determined by the sum of the 2428 /// frequencies of the non-identity copies it would introduce with the old 2429 /// and new register. 2430 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { 2431 // We have a broken hint, check if it is possible to fix it by 2432 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted 2433 // some register and PhysReg may be available for the other live-ranges. 2434 SmallSet<Register, 4> Visited; 2435 SmallVector<Register, 2> RecoloringCandidates; 2436 HintsInfo Info; 2437 Register Reg = VirtReg.reg(); 2438 MCRegister PhysReg = VRM->getPhys(Reg); 2439 // Start the recoloring algorithm from the input live-interval, then 2440 // it will propagate to the ones that are copy-related with it. 2441 Visited.insert(Reg); 2442 RecoloringCandidates.push_back(Reg); 2443 2444 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) 2445 << '(' << printReg(PhysReg, TRI) << ")\n"); 2446 2447 do { 2448 Reg = RecoloringCandidates.pop_back_val(); 2449 2450 // We cannot recolor physical register. 2451 if (Reg.isPhysical()) 2452 continue; 2453 2454 // This may be a skipped register. 2455 if (!VRM->hasPhys(Reg)) { 2456 assert(!shouldAllocateRegister(Reg) && 2457 "We have an unallocated variable which should have been handled"); 2458 continue; 2459 } 2460 2461 // Get the live interval mapped with this virtual register to be able 2462 // to check for the interference with the new color. 2463 LiveInterval &LI = LIS->getInterval(Reg); 2464 MCRegister CurrPhys = VRM->getPhys(Reg); 2465 // Check that the new color matches the register class constraints and 2466 // that it is free for this live range. 2467 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || 2468 Matrix->checkInterference(LI, PhysReg))) 2469 continue; 2470 2471 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI) 2472 << ") is recolorable.\n"); 2473 2474 // Gather the hint info. 2475 Info.clear(); 2476 collectHintInfo(Reg, Info); 2477 // Check if recoloring the live-range will increase the cost of the 2478 // non-identity copies. 2479 if (CurrPhys != PhysReg) { 2480 LLVM_DEBUG(dbgs() << "Checking profitability:\n"); 2481 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); 2482 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); 2483 LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost) 2484 << "\nNew Cost: " 2485 << printBlockFreq(*MBFI, NewCopiesCost) << '\n'); 2486 if (OldCopiesCost < NewCopiesCost) { 2487 LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); 2488 continue; 2489 } 2490 // At this point, the cost is either cheaper or equal. If it is 2491 // equal, we consider this is profitable because it may expose 2492 // more recoloring opportunities. 2493 LLVM_DEBUG(dbgs() << "=> Profitable.\n"); 2494 // Recolor the live-range. 2495 Matrix->unassign(LI); 2496 Matrix->assign(LI, PhysReg); 2497 } 2498 // Push all copy-related live-ranges to keep reconciling the broken 2499 // hints. 2500 for (const HintInfo &HI : Info) { 2501 if (Visited.insert(HI.Reg).second) 2502 RecoloringCandidates.push_back(HI.Reg); 2503 } 2504 } while (!RecoloringCandidates.empty()); 2505 } 2506 2507 /// Try to recolor broken hints. 2508 /// Broken hints may be repaired by recoloring when an evicted variable 2509 /// freed up a register for a larger live-range. 2510 /// Consider the following example: 2511 /// BB1: 2512 /// a = 2513 /// b = 2514 /// BB2: 2515 /// ... 2516 /// = b 2517 /// = a 2518 /// Let us assume b gets split: 2519 /// BB1: 2520 /// a = 2521 /// b = 2522 /// BB2: 2523 /// c = b 2524 /// ... 2525 /// d = c 2526 /// = d 2527 /// = a 2528 /// Because of how the allocation work, b, c, and d may be assigned different 2529 /// colors. Now, if a gets evicted later: 2530 /// BB1: 2531 /// a = 2532 /// st a, SpillSlot 2533 /// b = 2534 /// BB2: 2535 /// c = b 2536 /// ... 2537 /// d = c 2538 /// = d 2539 /// e = ld SpillSlot 2540 /// = e 2541 /// This is likely that we can assign the same register for b, c, and d, 2542 /// getting rid of 2 copies. 2543 void RAGreedy::tryHintsRecoloring() { 2544 for (const LiveInterval *LI : SetOfBrokenHints) { 2545 assert(LI->reg().isVirtual() && 2546 "Recoloring is possible only for virtual registers"); 2547 // Some dead defs may be around (e.g., because of debug uses). 2548 // Ignore those. 2549 if (!VRM->hasPhys(LI->reg())) 2550 continue; 2551 tryHintRecoloring(*LI); 2552 } 2553 } 2554 2555 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, 2556 SmallVectorImpl<Register> &NewVRegs, 2557 SmallVirtRegSet &FixedRegisters, 2558 RecoloringStack &RecolorStack, 2559 unsigned Depth) { 2560 uint8_t CostPerUseLimit = uint8_t(~0u); 2561 // First try assigning a free register. 2562 auto Order = 2563 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); 2564 if (MCRegister PhysReg = 2565 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { 2566 // When NewVRegs is not empty, we may have made decisions such as evicting 2567 // a virtual register, go with the earlier decisions and use the physical 2568 // register. 2569 if (CSRCost.getFrequency() && 2570 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { 2571 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, 2572 CostPerUseLimit, NewVRegs); 2573 if (CSRReg || !NewVRegs.empty()) 2574 // Return now if we decide to use a CSR or create new vregs due to 2575 // pre-splitting. 2576 return CSRReg; 2577 } else 2578 return PhysReg; 2579 } 2580 // Non empty NewVRegs means VirtReg has been split. 2581 if (!NewVRegs.empty()) 2582 return MCRegister(); 2583 2584 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg); 2585 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " 2586 << ExtraInfo->getCascade(VirtReg.reg()) << '\n'); 2587 2588 // Try to evict a less worthy live range, but only for ranges from the primary 2589 // queue. The RS_Split ranges already failed to do this, and they should not 2590 // get a second chance until they have been split. 2591 if (Stage != RS_Split) { 2592 if (MCRegister PhysReg = 2593 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit, 2594 FixedRegisters)) { 2595 Register Hint = MRI->getSimpleHint(VirtReg.reg()); 2596 // If VirtReg has a hint and that hint is broken record this 2597 // virtual register as a recoloring candidate for broken hint. 2598 // Indeed, since we evicted a variable in its neighborhood it is 2599 // likely we can at least partially recolor some of the 2600 // copy-related live-ranges. 2601 if (Hint && Hint != PhysReg) 2602 SetOfBrokenHints.insert(&VirtReg); 2603 return PhysReg; 2604 } 2605 } 2606 2607 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs"); 2608 2609 // The first time we see a live range, don't try to split or spill. 2610 // Wait until the second time, when all smaller ranges have been allocated. 2611 // This gives a better picture of the interference to split around. 2612 if (Stage < RS_Split) { 2613 ExtraInfo->setStage(VirtReg, RS_Split); 2614 LLVM_DEBUG(dbgs() << "wait for second round\n"); 2615 NewVRegs.push_back(VirtReg.reg()); 2616 return MCRegister(); 2617 } 2618 2619 if (Stage < RS_Spill && !VirtReg.empty()) { 2620 // Try splitting VirtReg or interferences. 2621 unsigned NewVRegSizeBefore = NewVRegs.size(); 2622 MCRegister PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); 2623 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) 2624 return PhysReg; 2625 } 2626 2627 // If we couldn't allocate a register from spilling, there is probably some 2628 // invalid inline assembly. The base class will report it. 2629 if (Stage >= RS_Done || !VirtReg.isSpillable()) { 2630 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2631 RecolorStack, Depth); 2632 } 2633 2634 // Finally spill VirtReg itself. 2635 NamedRegionTimer T("spill", "Spiller", TimerGroupName, 2636 TimerGroupDescription, TimePassesIsEnabled); 2637 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 2638 spiller().spill(LRE, &Order); 2639 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2640 2641 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by 2642 // the new regs are kept in LDV (still mapping to the old register), until 2643 // we rewrite spilled locations in LDV at a later stage. 2644 for (Register r : spiller().getSpilledRegs()) 2645 DebugVars->splitRegister(r, LRE.regs(), *LIS); 2646 for (Register r : spiller().getReplacedRegs()) 2647 DebugVars->splitRegister(r, LRE.regs(), *LIS); 2648 2649 if (VerifyEnabled) 2650 MF->verify(LIS, Indexes, "After spilling", &errs()); 2651 2652 // The live virtual register requesting allocation was spilled, so tell 2653 // the caller not to allocate anything during this round. 2654 return MCRegister(); 2655 } 2656 2657 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) { 2658 using namespace ore; 2659 if (Spills) { 2660 R << NV("NumSpills", Spills) << " spills "; 2661 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost "; 2662 } 2663 if (FoldedSpills) { 2664 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; 2665 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost) 2666 << " total folded spills cost "; 2667 } 2668 if (Reloads) { 2669 R << NV("NumReloads", Reloads) << " reloads "; 2670 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost "; 2671 } 2672 if (FoldedReloads) { 2673 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; 2674 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost) 2675 << " total folded reloads cost "; 2676 } 2677 if (ZeroCostFoldedReloads) 2678 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads) 2679 << " zero cost folded reloads "; 2680 if (Copies) { 2681 R << NV("NumVRCopies", Copies) << " virtual registers copies "; 2682 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost "; 2683 } 2684 } 2685 2686 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) { 2687 RAGreedyStats Stats; 2688 const MachineFrameInfo &MFI = MF->getFrameInfo(); 2689 int FI; 2690 2691 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) { 2692 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>( 2693 A->getPseudoValue())->getFrameIndex()); 2694 }; 2695 auto isPatchpointInstr = [](const MachineInstr &MI) { 2696 return MI.getOpcode() == TargetOpcode::PATCHPOINT || 2697 MI.getOpcode() == TargetOpcode::STACKMAP || 2698 MI.getOpcode() == TargetOpcode::STATEPOINT; 2699 }; 2700 for (MachineInstr &MI : MBB) { 2701 auto DestSrc = TII->isCopyInstr(MI); 2702 if (DestSrc) { 2703 const MachineOperand &Dest = *DestSrc->Destination; 2704 const MachineOperand &Src = *DestSrc->Source; 2705 Register SrcReg = Src.getReg(); 2706 Register DestReg = Dest.getReg(); 2707 // Only count `COPY`s with a virtual register as source or destination. 2708 if (SrcReg.isVirtual() || DestReg.isVirtual()) { 2709 if (SrcReg.isVirtual()) { 2710 SrcReg = VRM->getPhys(SrcReg); 2711 if (SrcReg && Src.getSubReg()) 2712 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg()); 2713 } 2714 if (DestReg.isVirtual()) { 2715 DestReg = VRM->getPhys(DestReg); 2716 if (DestReg && Dest.getSubReg()) 2717 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg()); 2718 } 2719 if (SrcReg != DestReg) 2720 ++Stats.Copies; 2721 } 2722 continue; 2723 } 2724 2725 SmallVector<const MachineMemOperand *, 2> Accesses; 2726 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { 2727 ++Stats.Reloads; 2728 continue; 2729 } 2730 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { 2731 ++Stats.Spills; 2732 continue; 2733 } 2734 if (TII->hasLoadFromStackSlot(MI, Accesses) && 2735 llvm::any_of(Accesses, isSpillSlotAccess)) { 2736 if (!isPatchpointInstr(MI)) { 2737 Stats.FoldedReloads += Accesses.size(); 2738 continue; 2739 } 2740 // For statepoint there may be folded and zero cost folded stack reloads. 2741 std::pair<unsigned, unsigned> NonZeroCostRange = 2742 TII->getPatchpointUnfoldableRange(MI); 2743 SmallSet<unsigned, 16> FoldedReloads; 2744 SmallSet<unsigned, 16> ZeroCostFoldedReloads; 2745 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) { 2746 MachineOperand &MO = MI.getOperand(Idx); 2747 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex())) 2748 continue; 2749 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second) 2750 FoldedReloads.insert(MO.getIndex()); 2751 else 2752 ZeroCostFoldedReloads.insert(MO.getIndex()); 2753 } 2754 // If stack slot is used in folded reload it is not zero cost then. 2755 for (unsigned Slot : FoldedReloads) 2756 ZeroCostFoldedReloads.erase(Slot); 2757 Stats.FoldedReloads += FoldedReloads.size(); 2758 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size(); 2759 continue; 2760 } 2761 Accesses.clear(); 2762 if (TII->hasStoreToStackSlot(MI, Accesses) && 2763 llvm::any_of(Accesses, isSpillSlotAccess)) { 2764 Stats.FoldedSpills += Accesses.size(); 2765 } 2766 } 2767 // Set cost of collected statistic by multiplication to relative frequency of 2768 // this basic block. 2769 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB); 2770 Stats.ReloadsCost = RelFreq * Stats.Reloads; 2771 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads; 2772 Stats.SpillsCost = RelFreq * Stats.Spills; 2773 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills; 2774 Stats.CopiesCost = RelFreq * Stats.Copies; 2775 return Stats; 2776 } 2777 2778 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) { 2779 RAGreedyStats Stats; 2780 2781 // Sum up the spill and reloads in subloops. 2782 for (MachineLoop *SubLoop : *L) 2783 Stats.add(reportStats(SubLoop)); 2784 2785 for (MachineBasicBlock *MBB : L->getBlocks()) 2786 // Handle blocks that were not included in subloops. 2787 if (Loops->getLoopFor(MBB) == L) 2788 Stats.add(computeStats(*MBB)); 2789 2790 if (!Stats.isEmpty()) { 2791 using namespace ore; 2792 2793 ORE->emit([&]() { 2794 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies", 2795 L->getStartLoc(), L->getHeader()); 2796 Stats.report(R); 2797 R << "generated in loop"; 2798 return R; 2799 }); 2800 } 2801 return Stats; 2802 } 2803 2804 void RAGreedy::reportStats() { 2805 if (!ORE->allowExtraAnalysis(DEBUG_TYPE)) 2806 return; 2807 RAGreedyStats Stats; 2808 for (MachineLoop *L : *Loops) 2809 Stats.add(reportStats(L)); 2810 // Process non-loop blocks. 2811 for (MachineBasicBlock &MBB : *MF) 2812 if (!Loops->getLoopFor(&MBB)) 2813 Stats.add(computeStats(MBB)); 2814 if (!Stats.isEmpty()) { 2815 using namespace ore; 2816 2817 ORE->emit([&]() { 2818 DebugLoc Loc; 2819 if (auto *SP = MF->getFunction().getSubprogram()) 2820 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP); 2821 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc, 2822 &MF->front()); 2823 Stats.report(R); 2824 R << "generated in function"; 2825 return R; 2826 }); 2827 } 2828 } 2829 2830 bool RAGreedy::hasVirtRegAlloc() { 2831 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { 2832 Register Reg = Register::index2VirtReg(I); 2833 if (MRI->reg_nodbg_empty(Reg)) 2834 continue; 2835 if (shouldAllocateRegister(Reg)) 2836 return true; 2837 } 2838 2839 return false; 2840 } 2841 2842 bool RAGreedy::run(MachineFunction &mf) { 2843 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2844 << "********** Function: " << mf.getName() << '\n'); 2845 2846 MF = &mf; 2847 TII = MF->getSubtarget().getInstrInfo(); 2848 2849 if (VerifyEnabled) 2850 MF->verify(LIS, Indexes, "Before greedy register allocator", &errs()); 2851 2852 RegAllocBase::init(*this->VRM, *this->LIS, *this->Matrix); 2853 2854 // Early return if there is no virtual register to be allocated to a 2855 // physical register. 2856 if (!hasVirtRegAlloc()) 2857 return false; 2858 2859 // Renumber to get accurate and consistent results from 2860 // SlotIndexes::getApproxInstrDistance. 2861 Indexes->packIndexes(); 2862 2863 initializeCSRCost(); 2864 2865 RegCosts = TRI->getRegisterCosts(*MF); 2866 RegClassPriorityTrumpsGlobalness = 2867 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences() 2868 ? GreedyRegClassPriorityTrumpsGlobalness 2869 : TRI->regClassPriorityTrumpsGlobalness(*MF); 2870 2871 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences() 2872 ? GreedyReverseLocalAssignment 2873 : TRI->reverseLocalAssignment(); 2874 2875 ExtraInfo.emplace(); 2876 2877 EvictAdvisor = EvictProvider->getAdvisor(*MF, *this, MBFI, Loops); 2878 PriorityAdvisor = PriorityProvider->getAdvisor(*MF, *this, *Indexes); 2879 2880 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI); 2881 SpillerInstance.reset(createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF, 2882 *VRM, *VRAI, Matrix)); 2883 2884 VRAI->calculateSpillWeightsAndHints(); 2885 2886 LLVM_DEBUG(LIS->dump()); 2887 2888 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2889 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); 2890 2891 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2892 GlobalCand.resize(32); // This will grow as needed. 2893 SetOfBrokenHints.clear(); 2894 2895 allocatePhysRegs(); 2896 tryHintsRecoloring(); 2897 2898 if (VerifyEnabled) 2899 MF->verify(LIS, Indexes, "Before post optimization", &errs()); 2900 postOptimization(); 2901 reportStats(); 2902 2903 releaseMemory(); 2904 return true; 2905 } 2906