xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the RAGreedy function pass for register allocation in
10 // optimized builds.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RegAllocGreedy.h"
15 #include "AllocationOrder.h"
16 #include "InterferenceCache.h"
17 #include "RegAllocBase.h"
18 #include "SplitKit.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/BitVector.h"
21 #include "llvm/ADT/IndexedMap.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
27 #include "llvm/CodeGen/CalcSpillWeights.h"
28 #include "llvm/CodeGen/EdgeBundles.h"
29 #include "llvm/CodeGen/LiveDebugVariables.h"
30 #include "llvm/CodeGen/LiveInterval.h"
31 #include "llvm/CodeGen/LiveIntervalUnion.h"
32 #include "llvm/CodeGen/LiveIntervals.h"
33 #include "llvm/CodeGen/LiveRangeEdit.h"
34 #include "llvm/CodeGen/LiveRegMatrix.h"
35 #include "llvm/CodeGen/LiveStacks.h"
36 #include "llvm/CodeGen/MachineBasicBlock.h"
37 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
38 #include "llvm/CodeGen/MachineDominators.h"
39 #include "llvm/CodeGen/MachineFrameInfo.h"
40 #include "llvm/CodeGen/MachineFunction.h"
41 #include "llvm/CodeGen/MachineFunctionPass.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineLoopInfo.h"
44 #include "llvm/CodeGen/MachineOperand.h"
45 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
46 #include "llvm/CodeGen/MachinePassManager.h"
47 #include "llvm/CodeGen/MachineRegisterInfo.h"
48 #include "llvm/CodeGen/RegAllocEvictionAdvisor.h"
49 #include "llvm/CodeGen/RegAllocGreedyPass.h"
50 #include "llvm/CodeGen/RegAllocPriorityAdvisor.h"
51 #include "llvm/CodeGen/RegAllocRegistry.h"
52 #include "llvm/CodeGen/RegisterClassInfo.h"
53 #include "llvm/CodeGen/SlotIndexes.h"
54 #include "llvm/CodeGen/SpillPlacement.h"
55 #include "llvm/CodeGen/Spiller.h"
56 #include "llvm/CodeGen/TargetInstrInfo.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/TargetSubtargetInfo.h"
59 #include "llvm/CodeGen/VirtRegMap.h"
60 #include "llvm/IR/Analysis.h"
61 #include "llvm/IR/DebugInfoMetadata.h"
62 #include "llvm/IR/Function.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/Pass.h"
65 #include "llvm/Support/BlockFrequency.h"
66 #include "llvm/Support/BranchProbability.h"
67 #include "llvm/Support/CommandLine.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/MathExtras.h"
70 #include "llvm/Support/Timer.h"
71 #include "llvm/Support/raw_ostream.h"
72 #include <algorithm>
73 #include <cassert>
74 #include <cstdint>
75 #include <utility>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "regalloc"
80 
81 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
82 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
83 STATISTIC(NumEvicted,      "Number of interferences evicted");
84 
85 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
86     "split-spill-mode", cl::Hidden,
87     cl::desc("Spill mode for splitting live ranges"),
88     cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
89                clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
90                clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
91     cl::init(SplitEditor::SM_Speed));
92 
93 static cl::opt<unsigned>
94 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
95                              cl::desc("Last chance recoloring max depth"),
96                              cl::init(5));
97 
98 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
99     "lcr-max-interf", cl::Hidden,
100     cl::desc("Last chance recoloring maximum number of considered"
101              " interference at a time"),
102     cl::init(8));
103 
104 static cl::opt<bool> ExhaustiveSearch(
105     "exhaustive-register-search", cl::NotHidden,
106     cl::desc("Exhaustive Search for registers bypassing the depth "
107              "and interference cutoffs of last chance recoloring"),
108     cl::Hidden);
109 
110 // FIXME: Find a good default for this flag and remove the flag.
111 static cl::opt<unsigned>
112 CSRFirstTimeCost("regalloc-csr-first-time-cost",
113               cl::desc("Cost for first time use of callee-saved register."),
114               cl::init(0), cl::Hidden);
115 
116 static cl::opt<unsigned long> GrowRegionComplexityBudget(
117     "grow-region-complexity-budget",
118     cl::desc("growRegion() does not scale with the number of BB edges, so "
119              "limit its budget and bail out once we reach the limit."),
120     cl::init(10000), cl::Hidden);
121 
122 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
123     "greedy-regclass-priority-trumps-globalness",
124     cl::desc("Change the greedy register allocator's live range priority "
125              "calculation to make the AllocationPriority of the register class "
126              "more important then whether the range is global"),
127     cl::Hidden);
128 
129 static cl::opt<bool> GreedyReverseLocalAssignment(
130     "greedy-reverse-local-assignment",
131     cl::desc("Reverse allocation order of local live ranges, such that "
132              "shorter local live ranges will tend to be allocated first"),
133     cl::Hidden);
134 
135 static cl::opt<unsigned> SplitThresholdForRegWithHint(
136     "split-threshold-for-reg-with-hint",
137     cl::desc("The threshold for splitting a virtual register with a hint, in "
138              "percentage"),
139     cl::init(75), cl::Hidden);
140 
141 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
142                                        createGreedyRegisterAllocator);
143 
144 namespace {
145 class RAGreedyLegacy : public MachineFunctionPass {
146   RegAllocFilterFunc F;
147 
148 public:
149   RAGreedyLegacy(const RegAllocFilterFunc F = nullptr);
150 
151   static char ID;
152   /// Return the pass name.
153   StringRef getPassName() const override { return "Greedy Register Allocator"; }
154 
155   /// RAGreedy analysis usage.
156   void getAnalysisUsage(AnalysisUsage &AU) const override;
157   /// Perform register allocation.
158   bool runOnMachineFunction(MachineFunction &mf) override;
159 
160   MachineFunctionProperties getRequiredProperties() const override {
161     return MachineFunctionProperties().setNoPHIs();
162   }
163 
164   MachineFunctionProperties getClearedProperties() const override {
165     return MachineFunctionProperties().setIsSSA();
166   }
167 };
168 
169 } // end anonymous namespace
170 
171 RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F)
172     : MachineFunctionPass(ID), F(std::move(F)) {
173   initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry());
174 }
175 
176 struct RAGreedy::RequiredAnalyses {
177   VirtRegMap *VRM = nullptr;
178   LiveIntervals *LIS = nullptr;
179   LiveRegMatrix *LRM = nullptr;
180   SlotIndexes *Indexes = nullptr;
181   MachineBlockFrequencyInfo *MBFI = nullptr;
182   MachineDominatorTree *DomTree = nullptr;
183   MachineLoopInfo *Loops = nullptr;
184   MachineOptimizationRemarkEmitter *ORE = nullptr;
185   EdgeBundles *Bundles = nullptr;
186   SpillPlacement *SpillPlacer = nullptr;
187   LiveDebugVariables *DebugVars = nullptr;
188 
189   // Used by InlineSpiller
190   LiveStacks *LSS;
191   // Proxies for eviction and priority advisors
192   RegAllocEvictionAdvisorProvider *EvictProvider;
193   RegAllocPriorityAdvisorProvider *PriorityProvider;
194 
195   RequiredAnalyses() = delete;
196   RequiredAnalyses(Pass &P);
197   RequiredAnalyses(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
198 };
199 
200 RAGreedy::RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F)
201     : RegAllocBase(F) {
202   VRM = Analyses.VRM;
203   LIS = Analyses.LIS;
204   Matrix = Analyses.LRM;
205   Indexes = Analyses.Indexes;
206   MBFI = Analyses.MBFI;
207   DomTree = Analyses.DomTree;
208   Loops = Analyses.Loops;
209   ORE = Analyses.ORE;
210   Bundles = Analyses.Bundles;
211   SpillPlacer = Analyses.SpillPlacer;
212   DebugVars = Analyses.DebugVars;
213   LSS = Analyses.LSS;
214   EvictProvider = Analyses.EvictProvider;
215   PriorityProvider = Analyses.PriorityProvider;
216 }
217 
218 void RAGreedyPass::printPipeline(
219     raw_ostream &OS,
220     function_ref<StringRef(StringRef)> MapClassName2PassName) const {
221   StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName;
222   OS << "greedy<" << FilterName << '>';
223 }
224 
225 RAGreedy::RequiredAnalyses::RequiredAnalyses(
226     MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) {
227   LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
228   LRM = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
229   LSS = &MFAM.getResult<LiveStacksAnalysis>(MF);
230   Indexes = &MFAM.getResult<SlotIndexesAnalysis>(MF);
231   MBFI = &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
232   DomTree = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
233   ORE = &MFAM.getResult<MachineOptimizationRemarkEmitterAnalysis>(MF);
234   Loops = &MFAM.getResult<MachineLoopAnalysis>(MF);
235   Bundles = &MFAM.getResult<EdgeBundlesAnalysis>(MF);
236   SpillPlacer = &MFAM.getResult<SpillPlacementAnalysis>(MF);
237   DebugVars = &MFAM.getResult<LiveDebugVariablesAnalysis>(MF);
238   EvictProvider = MFAM.getResult<RegAllocEvictionAdvisorAnalysis>(MF).Provider;
239   PriorityProvider =
240       MFAM.getResult<RegAllocPriorityAdvisorAnalysis>(MF).Provider;
241   VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
242 }
243 
244 PreservedAnalyses RAGreedyPass::run(MachineFunction &MF,
245                                     MachineFunctionAnalysisManager &MFAM) {
246   MFPropsModifier _(*this, MF);
247 
248   RAGreedy::RequiredAnalyses Analyses(MF, MFAM);
249   RAGreedy Impl(Analyses, Opts.Filter);
250 
251   bool Changed = Impl.run(MF);
252   if (!Changed)
253     return PreservedAnalyses::all();
254   auto PA = getMachineFunctionPassPreservedAnalyses();
255   PA.preserveSet<CFGAnalyses>();
256   PA.preserve<MachineBlockFrequencyAnalysis>();
257   PA.preserve<LiveIntervalsAnalysis>();
258   PA.preserve<SlotIndexesAnalysis>();
259   PA.preserve<LiveDebugVariablesAnalysis>();
260   PA.preserve<LiveStacksAnalysis>();
261   PA.preserve<VirtRegMapAnalysis>();
262   PA.preserve<LiveRegMatrixAnalysis>();
263   return PA;
264 }
265 
266 RAGreedy::RequiredAnalyses::RequiredAnalyses(Pass &P) {
267   VRM = &P.getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
268   LIS = &P.getAnalysis<LiveIntervalsWrapperPass>().getLIS();
269   LSS = &P.getAnalysis<LiveStacksWrapperLegacy>().getLS();
270   LRM = &P.getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
271   Indexes = &P.getAnalysis<SlotIndexesWrapperPass>().getSI();
272   MBFI = &P.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
273   DomTree = &P.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
274   ORE = &P.getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
275   Loops = &P.getAnalysis<MachineLoopInfoWrapperPass>().getLI();
276   Bundles = &P.getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
277   SpillPlacer = &P.getAnalysis<SpillPlacementWrapperLegacy>().getResult();
278   DebugVars = &P.getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
279   EvictProvider =
280       &P.getAnalysis<RegAllocEvictionAdvisorAnalysisLegacy>().getProvider();
281   PriorityProvider =
282       &P.getAnalysis<RegAllocPriorityAdvisorAnalysisLegacy>().getProvider();
283 }
284 
285 bool RAGreedyLegacy::runOnMachineFunction(MachineFunction &MF) {
286   RAGreedy::RequiredAnalyses Analyses(*this);
287   RAGreedy Impl(Analyses, F);
288   return Impl.run(MF);
289 }
290 
291 char RAGreedyLegacy::ID = 0;
292 char &llvm::RAGreedyLegacyID = RAGreedyLegacy::ID;
293 
294 INITIALIZE_PASS_BEGIN(RAGreedyLegacy, "greedy", "Greedy Register Allocator",
295                       false, false)
296 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy)
297 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
298 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
299 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescerLegacy)
300 INITIALIZE_PASS_DEPENDENCY(MachineSchedulerLegacy)
301 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
302 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
303 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
304 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
305 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
306 INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy)
307 INITIALIZE_PASS_DEPENDENCY(SpillPlacementWrapperLegacy)
308 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
309 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysisLegacy)
310 INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysisLegacy)
311 INITIALIZE_PASS_END(RAGreedyLegacy, "greedy", "Greedy Register Allocator",
312                     false, false)
313 
314 #ifndef NDEBUG
315 const char *const RAGreedy::StageName[] = {
316     "RS_New",
317     "RS_Assign",
318     "RS_Split",
319     "RS_Split2",
320     "RS_Spill",
321     "RS_Done"
322 };
323 #endif
324 
325 // Hysteresis to use when comparing floats.
326 // This helps stabilize decisions based on float comparisons.
327 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
328 
329 FunctionPass* llvm::createGreedyRegisterAllocator() {
330   return new RAGreedyLegacy();
331 }
332 
333 FunctionPass *llvm::createGreedyRegisterAllocator(RegAllocFilterFunc Ftor) {
334   return new RAGreedyLegacy(Ftor);
335 }
336 
337 void RAGreedyLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
338   AU.setPreservesCFG();
339   AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
340   AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
341   AU.addRequired<LiveIntervalsWrapperPass>();
342   AU.addPreserved<LiveIntervalsWrapperPass>();
343   AU.addRequired<SlotIndexesWrapperPass>();
344   AU.addPreserved<SlotIndexesWrapperPass>();
345   AU.addRequired<LiveDebugVariablesWrapperLegacy>();
346   AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
347   AU.addRequired<LiveStacksWrapperLegacy>();
348   AU.addPreserved<LiveStacksWrapperLegacy>();
349   AU.addRequired<MachineDominatorTreeWrapperPass>();
350   AU.addPreserved<MachineDominatorTreeWrapperPass>();
351   AU.addRequired<MachineLoopInfoWrapperPass>();
352   AU.addPreserved<MachineLoopInfoWrapperPass>();
353   AU.addRequired<VirtRegMapWrapperLegacy>();
354   AU.addPreserved<VirtRegMapWrapperLegacy>();
355   AU.addRequired<LiveRegMatrixWrapperLegacy>();
356   AU.addPreserved<LiveRegMatrixWrapperLegacy>();
357   AU.addRequired<EdgeBundlesWrapperLegacy>();
358   AU.addRequired<SpillPlacementWrapperLegacy>();
359   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
360   AU.addRequired<RegAllocEvictionAdvisorAnalysisLegacy>();
361   AU.addRequired<RegAllocPriorityAdvisorAnalysisLegacy>();
362   MachineFunctionPass::getAnalysisUsage(AU);
363 }
364 
365 //===----------------------------------------------------------------------===//
366 //                     LiveRangeEdit delegate methods
367 //===----------------------------------------------------------------------===//
368 
369 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
370   LiveInterval &LI = LIS->getInterval(VirtReg);
371   if (VRM->hasPhys(VirtReg)) {
372     Matrix->unassign(LI);
373     aboutToRemoveInterval(LI);
374     return true;
375   }
376   // Unassigned virtreg is probably in the priority queue.
377   // RegAllocBase will erase it after dequeueing.
378   // Nonetheless, clear the live-range so that the debug
379   // dump will show the right state for that VirtReg.
380   LI.clear();
381   return false;
382 }
383 
384 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
385   if (!VRM->hasPhys(VirtReg))
386     return;
387 
388   // Register is assigned, put it back on the queue for reassignment.
389   LiveInterval &LI = LIS->getInterval(VirtReg);
390   Matrix->unassign(LI);
391   RegAllocBase::enqueue(&LI);
392 }
393 
394 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
395   ExtraInfo->LRE_DidCloneVirtReg(New, Old);
396 }
397 
398 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
399   // Cloning a register we haven't even heard about yet?  Just ignore it.
400   if (!Info.inBounds(Old))
401     return;
402 
403   // LRE may clone a virtual register because dead code elimination causes it to
404   // be split into connected components. The new components are much smaller
405   // than the original, so they should get a new chance at being assigned.
406   // same stage as the parent.
407   Info[Old].Stage = RS_Assign;
408   Info.grow(New.id());
409   Info[New] = Info[Old];
410 }
411 
412 void RAGreedy::releaseMemory() {
413   SpillerInstance.reset();
414   GlobalCand.clear();
415 }
416 
417 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
418 
419 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
420   // Prioritize live ranges by size, assigning larger ranges first.
421   // The queue holds (size, reg) pairs.
422   const Register Reg = LI->reg();
423   assert(Reg.isVirtual() && "Can only enqueue virtual registers");
424 
425   auto Stage = ExtraInfo->getOrInitStage(Reg);
426   if (Stage == RS_New) {
427     Stage = RS_Assign;
428     ExtraInfo->setStage(Reg, Stage);
429   }
430 
431   unsigned Ret = PriorityAdvisor->getPriority(*LI);
432 
433   // The virtual register number is a tie breaker for same-sized ranges.
434   // Give lower vreg numbers higher priority to assign them first.
435   CurQueue.push(std::make_pair(Ret, ~Reg.id()));
436 }
437 
438 unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
439   const unsigned Size = LI.getSize();
440   const Register Reg = LI.reg();
441   unsigned Prio;
442   LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
443 
444   if (Stage == RS_Split) {
445     // Unsplit ranges that couldn't be allocated immediately are deferred until
446     // everything else has been allocated.
447     Prio = Size;
448   } else {
449     // Giant live ranges fall back to the global assignment heuristic, which
450     // prevents excessive spilling in pathological cases.
451     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
452     bool ForceGlobal = RC.GlobalPriority ||
453                        (!ReverseLocalAssignment &&
454                         (Size / SlotIndex::InstrDist) >
455                             (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
456     unsigned GlobalBit = 0;
457 
458     if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
459         LIS->intervalIsInOneMBB(LI)) {
460       // Allocate original local ranges in linear instruction order. Since they
461       // are singly defined, this produces optimal coloring in the absence of
462       // global interference and other constraints.
463       if (!ReverseLocalAssignment)
464         Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
465       else {
466         // Allocating bottom up may allow many short LRGs to be assigned first
467         // to one of the cheap registers. This could be much faster for very
468         // large blocks on targets with many physical registers.
469         Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
470       }
471     } else {
472       // Allocate global and split ranges in long->short order. Long ranges that
473       // don't fit should be spilled (or split) ASAP so they don't create
474       // interference.  Mark a bit to prioritize global above local ranges.
475       Prio = Size;
476       GlobalBit = 1;
477     }
478 
479     // Priority bit layout:
480     // 31 RS_Assign priority
481     // 30 Preference priority
482     // if (RegClassPriorityTrumpsGlobalness)
483     //   29-25 AllocPriority
484     //   24 GlobalBit
485     // else
486     //   29 Global bit
487     //   28-24 AllocPriority
488     // 0-23 Size/Instr distance
489 
490     // Clamp the size to fit with the priority masking scheme
491     Prio = std::min(Prio, (unsigned)maxUIntN(24));
492     assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
493 
494     if (RegClassPriorityTrumpsGlobalness)
495       Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
496     else
497       Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
498 
499     // Mark a higher bit to prioritize global and local above RS_Split.
500     Prio |= (1u << 31);
501 
502     // Boost ranges that have a physical register hint.
503     if (VRM->hasKnownPreference(Reg))
504       Prio |= (1u << 30);
505   }
506 
507   return Prio;
508 }
509 
510 unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const {
511   // Prioritize by virtual register number, lowest first.
512   Register Reg = LI.reg();
513   return ~Reg.virtRegIndex();
514 }
515 
516 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
517 
518 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
519   if (CurQueue.empty())
520     return nullptr;
521   LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
522   CurQueue.pop();
523   return LI;
524 }
525 
526 //===----------------------------------------------------------------------===//
527 //                            Direct Assignment
528 //===----------------------------------------------------------------------===//
529 
530 /// tryAssign - Try to assign VirtReg to an available register.
531 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
532                                AllocationOrder &Order,
533                                SmallVectorImpl<Register> &NewVRegs,
534                                const SmallVirtRegSet &FixedRegisters) {
535   MCRegister PhysReg;
536   for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
537     assert(*I);
538     if (!Matrix->checkInterference(VirtReg, *I)) {
539       if (I.isHint())
540         return *I;
541       else
542         PhysReg = *I;
543     }
544   }
545   if (!PhysReg.isValid())
546     return PhysReg;
547 
548   // PhysReg is available, but there may be a better choice.
549 
550   // If we missed a simple hint, try to cheaply evict interference from the
551   // preferred register.
552   if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
553     if (Order.isHint(Hint)) {
554       MCRegister PhysHint = Hint.asMCReg();
555       LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
556 
557       if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
558                                                  FixedRegisters)) {
559         evictInterference(VirtReg, PhysHint, NewVRegs);
560         return PhysHint;
561       }
562 
563       // We can also split the virtual register in cold blocks.
564       if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
565         return MCRegister();
566 
567       // Record the missed hint, we may be able to recover
568       // at the end if the surrounding allocation changed.
569       SetOfBrokenHints.insert(&VirtReg);
570     }
571 
572   // Try to evict interference from a cheaper alternative.
573   uint8_t Cost = RegCosts[PhysReg.id()];
574 
575   // Most registers have 0 additional cost.
576   if (!Cost)
577     return PhysReg;
578 
579   LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
580                     << (unsigned)Cost << '\n');
581   MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
582   return CheapReg ? CheapReg : PhysReg;
583 }
584 
585 //===----------------------------------------------------------------------===//
586 //                         Interference eviction
587 //===----------------------------------------------------------------------===//
588 
589 bool RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
590                                           MCRegister FromReg) const {
591   auto HasRegUnitInterference = [&](MCRegUnit Unit) {
592     // Instantiate a "subquery", not to be confused with the Queries array.
593     LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
594     return SubQ.checkInterference();
595   };
596 
597   for (MCRegister Reg :
598        AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix)) {
599     if (Reg == FromReg)
600       continue;
601     // If no units have interference, reassignment is possible.
602     if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
603       LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
604                         << printReg(FromReg, TRI) << " to "
605                         << printReg(Reg, TRI) << '\n');
606       return true;
607     }
608   }
609   return false;
610 }
611 
612 /// evictInterference - Evict any interferring registers that prevent VirtReg
613 /// from being assigned to Physreg. This assumes that canEvictInterference
614 /// returned true.
615 void RAGreedy::evictInterference(const LiveInterval &VirtReg,
616                                  MCRegister PhysReg,
617                                  SmallVectorImpl<Register> &NewVRegs) {
618   // Make sure that VirtReg has a cascade number, and assign that cascade
619   // number to every evicted register. These live ranges than then only be
620   // evicted by a newer cascade, preventing infinite loops.
621   unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
622 
623   LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
624                     << " interference: Cascade " << Cascade << '\n');
625 
626   // Collect all interfering virtregs first.
627   SmallVector<const LiveInterval *, 8> Intfs;
628   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
629     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
630     // We usually have the interfering VRegs cached so collectInterferingVRegs()
631     // should be fast, we may need to recalculate if when different physregs
632     // overlap the same register unit so we had different SubRanges queried
633     // against it.
634     ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
635     Intfs.append(IVR.begin(), IVR.end());
636   }
637 
638   // Evict them second. This will invalidate the queries.
639   for (const LiveInterval *Intf : Intfs) {
640     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
641     if (!VRM->hasPhys(Intf->reg()))
642       continue;
643 
644     Matrix->unassign(*Intf);
645     assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
646             VirtReg.isSpillable() < Intf->isSpillable()) &&
647            "Cannot decrease cascade number, illegal eviction");
648     ExtraInfo->setCascade(Intf->reg(), Cascade);
649     ++NumEvicted;
650     NewVRegs.push_back(Intf->reg());
651   }
652 }
653 
654 /// Returns true if the given \p PhysReg is a callee saved register and has not
655 /// been used for allocation yet.
656 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
657   MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
658   if (!CSR)
659     return false;
660 
661   return !Matrix->isPhysRegUsed(PhysReg);
662 }
663 
664 std::optional<unsigned>
665 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
666                                        const AllocationOrder &Order,
667                                        unsigned CostPerUseLimit) const {
668   unsigned OrderLimit = Order.getOrder().size();
669 
670   if (CostPerUseLimit < uint8_t(~0u)) {
671     // Check of any registers in RC are below CostPerUseLimit.
672     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
673     uint8_t MinCost = RegClassInfo.getMinCost(RC);
674     if (MinCost >= CostPerUseLimit) {
675       LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
676                         << MinCost << ", no cheaper registers to be found.\n");
677       return std::nullopt;
678     }
679 
680     // It is normal for register classes to have a long tail of registers with
681     // the same cost. We don't need to look at them if they're too expensive.
682     if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
683       OrderLimit = RegClassInfo.getLastCostChange(RC);
684       LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
685                         << " regs.\n");
686     }
687   }
688   return OrderLimit;
689 }
690 
691 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
692                                                  MCRegister PhysReg) const {
693   if (RegCosts[PhysReg.id()] >= CostPerUseLimit)
694     return false;
695   // The first use of a callee-saved register in a function has cost 1.
696   // Don't start using a CSR when the CostPerUseLimit is low.
697   if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
698     LLVM_DEBUG(
699         dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
700                << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
701                << '\n');
702     return false;
703   }
704   return true;
705 }
706 
707 /// tryEvict - Try to evict all interferences for a physreg.
708 /// @param  VirtReg Currently unassigned virtual register.
709 /// @param  Order   Physregs to try.
710 /// @return         Physreg to assign VirtReg, or 0.
711 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
712                               AllocationOrder &Order,
713                               SmallVectorImpl<Register> &NewVRegs,
714                               uint8_t CostPerUseLimit,
715                               const SmallVirtRegSet &FixedRegisters) {
716   NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
717                      TimePassesIsEnabled);
718 
719   MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
720       VirtReg, Order, CostPerUseLimit, FixedRegisters);
721   if (BestPhys.isValid())
722     evictInterference(VirtReg, BestPhys, NewVRegs);
723   return BestPhys;
724 }
725 
726 //===----------------------------------------------------------------------===//
727 //                              Region Splitting
728 //===----------------------------------------------------------------------===//
729 
730 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
731 /// interference pattern in Physreg and its aliases. Add the constraints to
732 /// SpillPlacement and return the static cost of this split in Cost, assuming
733 /// that all preferences in SplitConstraints are met.
734 /// Return false if there are no bundles with positive bias.
735 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
736                                    BlockFrequency &Cost) {
737   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
738 
739   // Reset interference dependent info.
740   SplitConstraints.resize(UseBlocks.size());
741   BlockFrequency StaticCost = BlockFrequency(0);
742   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
743     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
744     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
745 
746     BC.Number = BI.MBB->getNumber();
747     Intf.moveToBlock(BC.Number);
748     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
749     BC.Exit = (BI.LiveOut &&
750                !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
751                   ? SpillPlacement::PrefReg
752                   : SpillPlacement::DontCare;
753     BC.ChangesValue = BI.FirstDef.isValid();
754 
755     if (!Intf.hasInterference())
756       continue;
757 
758     // Number of spill code instructions to insert.
759     unsigned Ins = 0;
760 
761     // Interference for the live-in value.
762     if (BI.LiveIn) {
763       if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
764         BC.Entry = SpillPlacement::MustSpill;
765         ++Ins;
766       } else if (Intf.first() < BI.FirstInstr) {
767         BC.Entry = SpillPlacement::PrefSpill;
768         ++Ins;
769       } else if (Intf.first() < BI.LastInstr) {
770         ++Ins;
771       }
772 
773       // Abort if the spill cannot be inserted at the MBB' start
774       if (((BC.Entry == SpillPlacement::MustSpill) ||
775            (BC.Entry == SpillPlacement::PrefSpill)) &&
776           SlotIndex::isEarlierInstr(BI.FirstInstr,
777                                     SA->getFirstSplitPoint(BC.Number)))
778         return false;
779     }
780 
781     // Interference for the live-out value.
782     if (BI.LiveOut) {
783       if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
784         BC.Exit = SpillPlacement::MustSpill;
785         ++Ins;
786       } else if (Intf.last() > BI.LastInstr) {
787         BC.Exit = SpillPlacement::PrefSpill;
788         ++Ins;
789       } else if (Intf.last() > BI.FirstInstr) {
790         ++Ins;
791       }
792     }
793 
794     // Accumulate the total frequency of inserted spill code.
795     while (Ins--)
796       StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
797   }
798   Cost = StaticCost;
799 
800   // Add constraints for use-blocks. Note that these are the only constraints
801   // that may add a positive bias, it is downhill from here.
802   SpillPlacer->addConstraints(SplitConstraints);
803   return SpillPlacer->scanActiveBundles();
804 }
805 
806 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
807 /// live-through blocks in Blocks.
808 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
809                                      ArrayRef<unsigned> Blocks) {
810   const unsigned GroupSize = 8;
811   SpillPlacement::BlockConstraint BCS[GroupSize];
812   unsigned TBS[GroupSize];
813   unsigned B = 0, T = 0;
814 
815   for (unsigned Number : Blocks) {
816     Intf.moveToBlock(Number);
817 
818     if (!Intf.hasInterference()) {
819       assert(T < GroupSize && "Array overflow");
820       TBS[T] = Number;
821       if (++T == GroupSize) {
822         SpillPlacer->addLinks(ArrayRef(TBS, T));
823         T = 0;
824       }
825       continue;
826     }
827 
828     assert(B < GroupSize && "Array overflow");
829     BCS[B].Number = Number;
830 
831     // Abort if the spill cannot be inserted at the MBB' start
832     MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
833     auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
834     if (FirstNonDebugInstr != MBB->end() &&
835         SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
836                                   SA->getFirstSplitPoint(Number)))
837       return false;
838     // Interference for the live-in value.
839     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
840       BCS[B].Entry = SpillPlacement::MustSpill;
841     else
842       BCS[B].Entry = SpillPlacement::PrefSpill;
843 
844     // Interference for the live-out value.
845     if (Intf.last() >= SA->getLastSplitPoint(Number))
846       BCS[B].Exit = SpillPlacement::MustSpill;
847     else
848       BCS[B].Exit = SpillPlacement::PrefSpill;
849 
850     if (++B == GroupSize) {
851       SpillPlacer->addConstraints(ArrayRef(BCS, B));
852       B = 0;
853     }
854   }
855 
856   SpillPlacer->addConstraints(ArrayRef(BCS, B));
857   SpillPlacer->addLinks(ArrayRef(TBS, T));
858   return true;
859 }
860 
861 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
862   // Keep track of through blocks that have not been added to SpillPlacer.
863   BitVector Todo = SA->getThroughBlocks();
864   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
865   unsigned AddedTo = 0;
866 #ifndef NDEBUG
867   unsigned Visited = 0;
868 #endif
869 
870   unsigned long Budget = GrowRegionComplexityBudget;
871   while (true) {
872     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
873     // Find new through blocks in the periphery of PrefRegBundles.
874     for (unsigned Bundle : NewBundles) {
875       // Look at all blocks connected to Bundle in the full graph.
876       ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
877       // Limit compilation time by bailing out after we use all our budget.
878       if (Blocks.size() >= Budget)
879         return false;
880       Budget -= Blocks.size();
881       for (unsigned Block : Blocks) {
882         if (!Todo.test(Block))
883           continue;
884         Todo.reset(Block);
885         // This is a new through block. Add it to SpillPlacer later.
886         ActiveBlocks.push_back(Block);
887 #ifndef NDEBUG
888         ++Visited;
889 #endif
890       }
891     }
892     // Any new blocks to add?
893     if (ActiveBlocks.size() == AddedTo)
894       break;
895 
896     // Compute through constraints from the interference, or assume that all
897     // through blocks prefer spilling when forming compact regions.
898     auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
899     if (Cand.PhysReg) {
900       if (!addThroughConstraints(Cand.Intf, NewBlocks))
901         return false;
902     } else {
903       // Providing that the variable being spilled does not look like a loop
904       // induction variable, which is expensive to spill around and better
905       // pushed into a condition inside the loop if possible, provide a strong
906       // negative bias on through blocks to prevent unwanted liveness on loop
907       // backedges.
908       bool PrefSpill = true;
909       if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
910         // Check that the current bundle is adding a Header + start+end of
911         // loop-internal blocks. If the block is indeed a header, don't make
912         // the NewBlocks as PrefSpill to allow the variable to be live in
913         // Header<->Latch.
914         MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
915         if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
916             all_of(NewBlocks.drop_front(), [&](unsigned Block) {
917               return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
918             }))
919           PrefSpill = false;
920       }
921       if (PrefSpill)
922         SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
923     }
924     AddedTo = ActiveBlocks.size();
925 
926     // Perhaps iterating can enable more bundles?
927     SpillPlacer->iterate();
928   }
929   LLVM_DEBUG(dbgs() << ", v=" << Visited);
930   return true;
931 }
932 
933 /// calcCompactRegion - Compute the set of edge bundles that should be live
934 /// when splitting the current live range into compact regions.  Compact
935 /// regions can be computed without looking at interference.  They are the
936 /// regions formed by removing all the live-through blocks from the live range.
937 ///
938 /// Returns false if the current live range is already compact, or if the
939 /// compact regions would form single block regions anyway.
940 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
941   // Without any through blocks, the live range is already compact.
942   if (!SA->getNumThroughBlocks())
943     return false;
944 
945   // Compact regions don't correspond to any physreg.
946   Cand.reset(IntfCache, MCRegister::NoRegister);
947 
948   LLVM_DEBUG(dbgs() << "Compact region bundles");
949 
950   // Use the spill placer to determine the live bundles. GrowRegion pretends
951   // that all the through blocks have interference when PhysReg is unset.
952   SpillPlacer->prepare(Cand.LiveBundles);
953 
954   // The static split cost will be zero since Cand.Intf reports no interference.
955   BlockFrequency Cost;
956   if (!addSplitConstraints(Cand.Intf, Cost)) {
957     LLVM_DEBUG(dbgs() << ", none.\n");
958     return false;
959   }
960 
961   if (!growRegion(Cand)) {
962     LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
963     return false;
964   }
965 
966   SpillPlacer->finish();
967 
968   if (!Cand.LiveBundles.any()) {
969     LLVM_DEBUG(dbgs() << ", none.\n");
970     return false;
971   }
972 
973   LLVM_DEBUG({
974     for (int I : Cand.LiveBundles.set_bits())
975       dbgs() << " EB#" << I;
976     dbgs() << ".\n";
977   });
978   return true;
979 }
980 
981 /// calcSpillCost - Compute how expensive it would be to split the live range in
982 /// SA around all use blocks instead of forming bundle regions.
983 BlockFrequency RAGreedy::calcSpillCost() {
984   BlockFrequency Cost = BlockFrequency(0);
985   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
986   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
987     unsigned Number = BI.MBB->getNumber();
988     // We normally only need one spill instruction - a load or a store.
989     Cost += SpillPlacer->getBlockFrequency(Number);
990 
991     // Unless the value is redefined in the block.
992     if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
993       Cost += SpillPlacer->getBlockFrequency(Number);
994   }
995   return Cost;
996 }
997 
998 /// calcGlobalSplitCost - Return the global split cost of following the split
999 /// pattern in LiveBundles. This cost should be added to the local cost of the
1000 /// interference pattern in SplitConstraints.
1001 ///
1002 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
1003                                              const AllocationOrder &Order) {
1004   BlockFrequency GlobalCost = BlockFrequency(0);
1005   const BitVector &LiveBundles = Cand.LiveBundles;
1006   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1007   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
1008     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
1009     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
1010     bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, false)];
1011     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
1012     unsigned Ins = 0;
1013 
1014     Cand.Intf.moveToBlock(BC.Number);
1015 
1016     if (BI.LiveIn)
1017       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
1018     if (BI.LiveOut)
1019       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
1020     while (Ins--)
1021       GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1022   }
1023 
1024   for (unsigned Number : Cand.ActiveBlocks) {
1025     bool RegIn  = LiveBundles[Bundles->getBundle(Number, false)];
1026     bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
1027     if (!RegIn && !RegOut)
1028       continue;
1029     if (RegIn && RegOut) {
1030       // We need double spill code if this block has interference.
1031       Cand.Intf.moveToBlock(Number);
1032       if (Cand.Intf.hasInterference()) {
1033         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1034         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1035       }
1036       continue;
1037     }
1038     // live-in / stack-out or stack-in live-out.
1039     GlobalCost += SpillPlacer->getBlockFrequency(Number);
1040   }
1041   return GlobalCost;
1042 }
1043 
1044 /// splitAroundRegion - Split the current live range around the regions
1045 /// determined by BundleCand and GlobalCand.
1046 ///
1047 /// Before calling this function, GlobalCand and BundleCand must be initialized
1048 /// so each bundle is assigned to a valid candidate, or NoCand for the
1049 /// stack-bound bundles.  The shared SA/SE SplitAnalysis and SplitEditor
1050 /// objects must be initialized for the current live range, and intervals
1051 /// created for the used candidates.
1052 ///
1053 /// @param LREdit    The LiveRangeEdit object handling the current split.
1054 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
1055 ///                  must appear in this list.
1056 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
1057                                  ArrayRef<unsigned> UsedCands) {
1058   // These are the intervals created for new global ranges. We may create more
1059   // intervals for local ranges.
1060   const unsigned NumGlobalIntvs = LREdit.size();
1061   LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
1062                     << " globals.\n");
1063   assert(NumGlobalIntvs && "No global intervals configured");
1064 
1065   // Isolate even single instructions when dealing with a proper sub-class.
1066   // That guarantees register class inflation for the stack interval because it
1067   // is all copies.
1068   Register Reg = SA->getParent().reg();
1069   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1070 
1071   // First handle all the blocks with uses.
1072   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1073   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1074     unsigned Number = BI.MBB->getNumber();
1075     unsigned IntvIn = 0, IntvOut = 0;
1076     SlotIndex IntfIn, IntfOut;
1077     if (BI.LiveIn) {
1078       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1079       if (CandIn != NoCand) {
1080         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1081         IntvIn = Cand.IntvIdx;
1082         Cand.Intf.moveToBlock(Number);
1083         IntfIn = Cand.Intf.first();
1084       }
1085     }
1086     if (BI.LiveOut) {
1087       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1088       if (CandOut != NoCand) {
1089         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1090         IntvOut = Cand.IntvIdx;
1091         Cand.Intf.moveToBlock(Number);
1092         IntfOut = Cand.Intf.last();
1093       }
1094     }
1095 
1096     // Create separate intervals for isolated blocks with multiple uses.
1097     if (!IntvIn && !IntvOut) {
1098       LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
1099       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1100         SE->splitSingleBlock(BI);
1101       continue;
1102     }
1103 
1104     if (IntvIn && IntvOut)
1105       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1106     else if (IntvIn)
1107       SE->splitRegInBlock(BI, IntvIn, IntfIn);
1108     else
1109       SE->splitRegOutBlock(BI, IntvOut, IntfOut);
1110   }
1111 
1112   // Handle live-through blocks. The relevant live-through blocks are stored in
1113   // the ActiveBlocks list with each candidate. We need to filter out
1114   // duplicates.
1115   BitVector Todo = SA->getThroughBlocks();
1116   for (unsigned UsedCand : UsedCands) {
1117     ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
1118     for (unsigned Number : Blocks) {
1119       if (!Todo.test(Number))
1120         continue;
1121       Todo.reset(Number);
1122 
1123       unsigned IntvIn = 0, IntvOut = 0;
1124       SlotIndex IntfIn, IntfOut;
1125 
1126       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1127       if (CandIn != NoCand) {
1128         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1129         IntvIn = Cand.IntvIdx;
1130         Cand.Intf.moveToBlock(Number);
1131         IntfIn = Cand.Intf.first();
1132       }
1133 
1134       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1135       if (CandOut != NoCand) {
1136         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1137         IntvOut = Cand.IntvIdx;
1138         Cand.Intf.moveToBlock(Number);
1139         IntfOut = Cand.Intf.last();
1140       }
1141       if (!IntvIn && !IntvOut)
1142         continue;
1143       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1144     }
1145   }
1146 
1147   ++NumGlobalSplits;
1148 
1149   SmallVector<unsigned, 8> IntvMap;
1150   SE->finish(&IntvMap);
1151   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1152 
1153   unsigned OrigBlocks = SA->getNumLiveBlocks();
1154 
1155   // Sort out the new intervals created by splitting. We get four kinds:
1156   // - Remainder intervals should not be split again.
1157   // - Candidate intervals can be assigned to Cand.PhysReg.
1158   // - Block-local splits are candidates for local splitting.
1159   // - DCE leftovers should go back on the queue.
1160   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1161     const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1162 
1163     // Ignore old intervals from DCE.
1164     if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1165       continue;
1166 
1167     // Remainder interval. Don't try splitting again, spill if it doesn't
1168     // allocate.
1169     if (IntvMap[I] == 0) {
1170       ExtraInfo->setStage(Reg, RS_Spill);
1171       continue;
1172     }
1173 
1174     // Global intervals. Allow repeated splitting as long as the number of live
1175     // blocks is strictly decreasing.
1176     if (IntvMap[I] < NumGlobalIntvs) {
1177       if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1178         LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1179                           << " blocks as original.\n");
1180         // Don't allow repeated splitting as a safe guard against looping.
1181         ExtraInfo->setStage(Reg, RS_Split2);
1182       }
1183       continue;
1184     }
1185 
1186     // Other intervals are treated as new. This includes local intervals created
1187     // for blocks with multiple uses, and anything created by DCE.
1188   }
1189 
1190   if (VerifyEnabled)
1191     MF->verify(LIS, Indexes, "After splitting live range around region",
1192                &errs());
1193 }
1194 
1195 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1196                                     AllocationOrder &Order,
1197                                     SmallVectorImpl<Register> &NewVRegs) {
1198   if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1199     return MCRegister::NoRegister;
1200   unsigned NumCands = 0;
1201   BlockFrequency SpillCost = calcSpillCost();
1202   BlockFrequency BestCost;
1203 
1204   // Check if we can split this live range around a compact region.
1205   bool HasCompact = calcCompactRegion(GlobalCand.front());
1206   if (HasCompact) {
1207     // Yes, keep GlobalCand[0] as the compact region candidate.
1208     NumCands = 1;
1209     BestCost = BlockFrequency::max();
1210   } else {
1211     // No benefit from the compact region, our fallback will be per-block
1212     // splitting. Make sure we find a solution that is cheaper than spilling.
1213     BestCost = SpillCost;
1214     LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
1215                       << printBlockFreq(*MBFI, BestCost) << '\n');
1216   }
1217 
1218   unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1219                                                NumCands, false /*IgnoreCSR*/);
1220 
1221   // No solutions found, fall back to single block splitting.
1222   if (!HasCompact && BestCand == NoCand)
1223     return MCRegister::NoRegister;
1224 
1225   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1226 }
1227 
1228 unsigned
1229 RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
1230                                             AllocationOrder &Order,
1231                                             BlockFrequency &BestCost,
1232                                             unsigned &NumCands,
1233                                             unsigned &BestCand) {
1234   // Discard bad candidates before we run out of interference cache cursors.
1235   // This will only affect register classes with a lot of registers (>32).
1236   if (NumCands == IntfCache.getMaxCursors()) {
1237     unsigned WorstCount = ~0u;
1238     unsigned Worst = 0;
1239     for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1240       if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1241         continue;
1242       unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1243       if (Count < WorstCount) {
1244         Worst = CandIndex;
1245         WorstCount = Count;
1246       }
1247     }
1248     --NumCands;
1249     GlobalCand[Worst] = GlobalCand[NumCands];
1250     if (BestCand == NumCands)
1251       BestCand = Worst;
1252   }
1253 
1254   if (GlobalCand.size() <= NumCands)
1255     GlobalCand.resize(NumCands+1);
1256   GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1257   Cand.reset(IntfCache, PhysReg);
1258 
1259   SpillPlacer->prepare(Cand.LiveBundles);
1260   BlockFrequency Cost;
1261   if (!addSplitConstraints(Cand.Intf, Cost)) {
1262     LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1263     return BestCand;
1264   }
1265   LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
1266                     << "\tstatic = " << printBlockFreq(*MBFI, Cost));
1267   if (Cost >= BestCost) {
1268     LLVM_DEBUG({
1269       if (BestCand == NoCand)
1270         dbgs() << " worse than no bundles\n";
1271       else
1272         dbgs() << " worse than "
1273                << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1274     });
1275     return BestCand;
1276   }
1277   if (!growRegion(Cand)) {
1278     LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1279     return BestCand;
1280   }
1281 
1282   SpillPlacer->finish();
1283 
1284   // No live bundles, defer to splitSingleBlocks().
1285   if (!Cand.LiveBundles.any()) {
1286     LLVM_DEBUG(dbgs() << " no bundles.\n");
1287     return BestCand;
1288   }
1289 
1290   Cost += calcGlobalSplitCost(Cand, Order);
1291   LLVM_DEBUG({
1292     dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
1293     for (int I : Cand.LiveBundles.set_bits())
1294       dbgs() << " EB#" << I;
1295     dbgs() << ".\n";
1296   });
1297   if (Cost < BestCost) {
1298     BestCand = NumCands;
1299     BestCost = Cost;
1300   }
1301   ++NumCands;
1302 
1303   return BestCand;
1304 }
1305 
1306 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1307                                             AllocationOrder &Order,
1308                                             BlockFrequency &BestCost,
1309                                             unsigned &NumCands,
1310                                             bool IgnoreCSR) {
1311   unsigned BestCand = NoCand;
1312   for (MCPhysReg PhysReg : Order) {
1313     assert(PhysReg);
1314     if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1315       continue;
1316 
1317     calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1318                                       BestCand);
1319   }
1320 
1321   return BestCand;
1322 }
1323 
1324 MCRegister RAGreedy::doRegionSplit(const LiveInterval &VirtReg,
1325                                    unsigned BestCand, bool HasCompact,
1326                                    SmallVectorImpl<Register> &NewVRegs) {
1327   SmallVector<unsigned, 8> UsedCands;
1328   // Prepare split editor.
1329   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1330   SE->reset(LREdit, SplitSpillMode);
1331 
1332   // Assign all edge bundles to the preferred candidate, or NoCand.
1333   BundleCand.assign(Bundles->getNumBundles(), NoCand);
1334 
1335   // Assign bundles for the best candidate region.
1336   if (BestCand != NoCand) {
1337     GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1338     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1339       UsedCands.push_back(BestCand);
1340       Cand.IntvIdx = SE->openIntv();
1341       LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1342                         << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1343       (void)B;
1344     }
1345   }
1346 
1347   // Assign bundles for the compact region.
1348   if (HasCompact) {
1349     GlobalSplitCandidate &Cand = GlobalCand.front();
1350     assert(!Cand.PhysReg && "Compact region has no physreg");
1351     if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1352       UsedCands.push_back(0);
1353       Cand.IntvIdx = SE->openIntv();
1354       LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1355                         << " bundles, intv " << Cand.IntvIdx << ".\n");
1356       (void)B;
1357     }
1358   }
1359 
1360   splitAroundRegion(LREdit, UsedCands);
1361   return MCRegister();
1362 }
1363 
1364 // VirtReg has a physical Hint, this function tries to split VirtReg around
1365 // Hint if we can place new COPY instructions in cold blocks.
1366 bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
1367                                      const LiveInterval &VirtReg,
1368                                      SmallVectorImpl<Register> &NewVRegs,
1369                                      AllocationOrder &Order) {
1370   // Split the VirtReg may generate COPY instructions in multiple cold basic
1371   // blocks, and increase code size. So we avoid it when the function is
1372   // optimized for size.
1373   if (MF->getFunction().hasOptSize())
1374     return false;
1375 
1376   // Don't allow repeated splitting as a safe guard against looping.
1377   if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
1378     return false;
1379 
1380   BlockFrequency Cost = BlockFrequency(0);
1381   Register Reg = VirtReg.reg();
1382 
1383   // Compute the cost of assigning a non Hint physical register to VirtReg.
1384   // We define it as the total frequency of broken COPY instructions to/from
1385   // Hint register, and after split, they can be deleted.
1386   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
1387     if (!TII->isFullCopyInstr(Instr))
1388       continue;
1389     Register OtherReg = Instr.getOperand(1).getReg();
1390     if (OtherReg == Reg) {
1391       OtherReg = Instr.getOperand(0).getReg();
1392       if (OtherReg == Reg)
1393         continue;
1394       // Check if VirtReg interferes with OtherReg after this COPY instruction.
1395       if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1396         continue;
1397     }
1398     MCRegister OtherPhysReg =
1399         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1400     if (OtherPhysReg == Hint)
1401       Cost += MBFI->getBlockFreq(Instr.getParent());
1402   }
1403 
1404   // Decrease the cost so it will be split in colder blocks.
1405   BranchProbability Threshold(SplitThresholdForRegWithHint, 100);
1406   Cost *= Threshold;
1407   if (Cost == BlockFrequency(0))
1408     return false;
1409 
1410   unsigned NumCands = 0;
1411   unsigned BestCand = NoCand;
1412   SA->analyze(&VirtReg);
1413   calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1414   if (BestCand == NoCand)
1415     return false;
1416 
1417   doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1418   return true;
1419 }
1420 
1421 //===----------------------------------------------------------------------===//
1422 //                            Per-Block Splitting
1423 //===----------------------------------------------------------------------===//
1424 
1425 /// tryBlockSplit - Split a global live range around every block with uses. This
1426 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1427 /// they don't allocate.
1428 MCRegister RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1429                                    AllocationOrder &Order,
1430                                    SmallVectorImpl<Register> &NewVRegs) {
1431   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1432   Register Reg = VirtReg.reg();
1433   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1434   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1435   SE->reset(LREdit, SplitSpillMode);
1436   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1437   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1438     if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1439       SE->splitSingleBlock(BI);
1440   }
1441   // No blocks were split.
1442   if (LREdit.empty())
1443     return MCRegister();
1444 
1445   // We did split for some blocks.
1446   SmallVector<unsigned, 8> IntvMap;
1447   SE->finish(&IntvMap);
1448 
1449   // Tell LiveDebugVariables about the new ranges.
1450   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1451 
1452   // Sort out the new intervals created by splitting. The remainder interval
1453   // goes straight to spilling, the new local ranges get to stay RS_New.
1454   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1455     const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1456     if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1457       ExtraInfo->setStage(LI, RS_Spill);
1458   }
1459 
1460   if (VerifyEnabled)
1461     MF->verify(LIS, Indexes, "After splitting live range around basic blocks",
1462                &errs());
1463   return MCRegister();
1464 }
1465 
1466 //===----------------------------------------------------------------------===//
1467 //                         Per-Instruction Splitting
1468 //===----------------------------------------------------------------------===//
1469 
1470 /// Get the number of allocatable registers that match the constraints of \p Reg
1471 /// on \p MI and that are also in \p SuperRC.
1472 static unsigned getNumAllocatableRegsForConstraints(
1473     const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1474     const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1475     const RegisterClassInfo &RCI) {
1476   assert(SuperRC && "Invalid register class");
1477 
1478   const TargetRegisterClass *ConstrainedRC =
1479       MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1480                                              /* ExploreBundle */ true);
1481   if (!ConstrainedRC)
1482     return 0;
1483   return RCI.getNumAllocatableRegs(ConstrainedRC);
1484 }
1485 
1486 static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
1487                                        const TargetRegisterInfo &TRI,
1488                                        const MachineInstr &FirstMI,
1489                                        Register Reg) {
1490   LaneBitmask Mask;
1491   SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
1492   (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
1493 
1494   for (auto [MI, OpIdx] : Ops) {
1495     const MachineOperand &MO = MI->getOperand(OpIdx);
1496     assert(MO.isReg() && MO.getReg() == Reg);
1497     unsigned SubReg = MO.getSubReg();
1498     if (SubReg == 0 && MO.isUse()) {
1499       if (MO.isUndef())
1500         continue;
1501       return MRI.getMaxLaneMaskForVReg(Reg);
1502     }
1503 
1504     LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
1505     if (MO.isDef()) {
1506       if (!MO.isUndef())
1507         Mask |= ~SubRegMask;
1508     } else
1509       Mask |= SubRegMask;
1510   }
1511 
1512   return Mask;
1513 }
1514 
1515 /// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1516 /// VirtReg.
1517 static bool readsLaneSubset(const MachineRegisterInfo &MRI,
1518                             const MachineInstr *MI, const LiveInterval &VirtReg,
1519                             const TargetRegisterInfo *TRI, SlotIndex Use,
1520                             const TargetInstrInfo *TII) {
1521   // Early check the common case. Beware of the semi-formed bundles SplitKit
1522   // creates by setting the bundle flag on copies without a matching BUNDLE.
1523 
1524   auto DestSrc = TII->isCopyInstr(*MI);
1525   if (DestSrc && !MI->isBundled() &&
1526       DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1527     return false;
1528 
1529   // FIXME: We're only considering uses, but should be consider defs too?
1530   LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1531 
1532   LaneBitmask LiveAtMask;
1533   for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1534     if (S.liveAt(Use))
1535       LiveAtMask |= S.LaneMask;
1536   }
1537 
1538   // If the live lanes aren't different from the lanes used by the instruction,
1539   // this doesn't help.
1540   return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1541 }
1542 
1543 /// tryInstructionSplit - Split a live range around individual instructions.
1544 /// This is normally not worthwhile since the spiller is doing essentially the
1545 /// same thing. However, when the live range is in a constrained register
1546 /// class, it may help to insert copies such that parts of the live range can
1547 /// be moved to a larger register class.
1548 ///
1549 /// This is similar to spilling to a larger register class.
1550 MCRegister RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1551                                          AllocationOrder &Order,
1552                                          SmallVectorImpl<Register> &NewVRegs) {
1553   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1554   // There is no point to this if there are no larger sub-classes.
1555 
1556   bool SplitSubClass = true;
1557   if (!RegClassInfo.isProperSubClass(CurRC)) {
1558     if (!VirtReg.hasSubRanges())
1559       return MCRegister();
1560     SplitSubClass = false;
1561   }
1562 
1563   // Always enable split spill mode, since we're effectively spilling to a
1564   // register.
1565   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1566   SE->reset(LREdit, SplitEditor::SM_Size);
1567 
1568   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1569   if (Uses.size() <= 1)
1570     return MCRegister();
1571 
1572   LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1573                     << " individual instrs.\n");
1574 
1575   const TargetRegisterClass *SuperRC =
1576       TRI->getLargestLegalSuperClass(CurRC, *MF);
1577   unsigned SuperRCNumAllocatableRegs =
1578       RegClassInfo.getNumAllocatableRegs(SuperRC);
1579   // Split around every non-copy instruction if this split will relax
1580   // the constraints on the virtual register.
1581   // Otherwise, splitting just inserts uncoalescable copies that do not help
1582   // the allocation.
1583   for (const SlotIndex Use : Uses) {
1584     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1585       if (TII->isFullCopyInstr(*MI) ||
1586           (SplitSubClass &&
1587            SuperRCNumAllocatableRegs ==
1588                getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1589                                                    TII, TRI, RegClassInfo)) ||
1590           // TODO: Handle split for subranges with subclass constraints?
1591           (!SplitSubClass && VirtReg.hasSubRanges() &&
1592            !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1593         LLVM_DEBUG(dbgs() << "    skip:\t" << Use << '\t' << *MI);
1594         continue;
1595       }
1596     }
1597     SE->openIntv();
1598     SlotIndex SegStart = SE->enterIntvBefore(Use);
1599     SlotIndex SegStop = SE->leaveIntvAfter(Use);
1600     SE->useIntv(SegStart, SegStop);
1601   }
1602 
1603   if (LREdit.empty()) {
1604     LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1605     return MCRegister();
1606   }
1607 
1608   SmallVector<unsigned, 8> IntvMap;
1609   SE->finish(&IntvMap);
1610   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1611   // Assign all new registers to RS_Spill. This was the last chance.
1612   ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1613   return MCRegister();
1614 }
1615 
1616 //===----------------------------------------------------------------------===//
1617 //                             Local Splitting
1618 //===----------------------------------------------------------------------===//
1619 
1620 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1621 /// in order to use PhysReg between two entries in SA->UseSlots.
1622 ///
1623 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1624 ///
1625 void RAGreedy::calcGapWeights(MCRegister PhysReg,
1626                               SmallVectorImpl<float> &GapWeight) {
1627   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1628   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1629   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1630   const unsigned NumGaps = Uses.size()-1;
1631 
1632   // Start and end points for the interference check.
1633   SlotIndex StartIdx =
1634     BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1635   SlotIndex StopIdx =
1636     BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1637 
1638   GapWeight.assign(NumGaps, 0.0f);
1639 
1640   // Add interference from each overlapping register.
1641   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1642     if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1643              .checkInterference())
1644       continue;
1645 
1646     // We know that VirtReg is a continuous interval from FirstInstr to
1647     // LastInstr, so we don't need InterferenceQuery.
1648     //
1649     // Interference that overlaps an instruction is counted in both gaps
1650     // surrounding the instruction. The exception is interference before
1651     // StartIdx and after StopIdx.
1652     //
1653     LiveIntervalUnion::SegmentIter IntI =
1654         Matrix->getLiveUnions()[Unit].find(StartIdx);
1655     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1656       // Skip the gaps before IntI.
1657       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1658         if (++Gap == NumGaps)
1659           break;
1660       if (Gap == NumGaps)
1661         break;
1662 
1663       // Update the gaps covered by IntI.
1664       const float weight = IntI.value()->weight();
1665       for (; Gap != NumGaps; ++Gap) {
1666         GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1667         if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1668           break;
1669       }
1670       if (Gap == NumGaps)
1671         break;
1672     }
1673   }
1674 
1675   // Add fixed interference.
1676   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1677     const LiveRange &LR = LIS->getRegUnit(Unit);
1678     LiveRange::const_iterator I = LR.find(StartIdx);
1679     LiveRange::const_iterator E = LR.end();
1680 
1681     // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1682     for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1683       while (Uses[Gap+1].getBoundaryIndex() < I->start)
1684         if (++Gap == NumGaps)
1685           break;
1686       if (Gap == NumGaps)
1687         break;
1688 
1689       for (; Gap != NumGaps; ++Gap) {
1690         GapWeight[Gap] = huge_valf;
1691         if (Uses[Gap+1].getBaseIndex() >= I->end)
1692           break;
1693       }
1694       if (Gap == NumGaps)
1695         break;
1696     }
1697   }
1698 }
1699 
1700 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1701 /// basic block.
1702 ///
1703 MCRegister RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1704                                    AllocationOrder &Order,
1705                                    SmallVectorImpl<Register> &NewVRegs) {
1706   // TODO: the function currently only handles a single UseBlock; it should be
1707   // possible to generalize.
1708   if (SA->getUseBlocks().size() != 1)
1709     return MCRegister();
1710 
1711   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1712 
1713   // Note that it is possible to have an interval that is live-in or live-out
1714   // while only covering a single block - A phi-def can use undef values from
1715   // predecessors, and the block could be a single-block loop.
1716   // We don't bother doing anything clever about such a case, we simply assume
1717   // that the interval is continuous from FirstInstr to LastInstr. We should
1718   // make sure that we don't do anything illegal to such an interval, though.
1719 
1720   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1721   if (Uses.size() <= 2)
1722     return MCRegister();
1723   const unsigned NumGaps = Uses.size()-1;
1724 
1725   LLVM_DEBUG({
1726     dbgs() << "tryLocalSplit: ";
1727     for (const auto &Use : Uses)
1728       dbgs() << ' ' << Use;
1729     dbgs() << '\n';
1730   });
1731 
1732   // If VirtReg is live across any register mask operands, compute a list of
1733   // gaps with register masks.
1734   SmallVector<unsigned, 8> RegMaskGaps;
1735   if (Matrix->checkRegMaskInterference(VirtReg)) {
1736     // Get regmask slots for the whole block.
1737     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1738     LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1739     // Constrain to VirtReg's live range.
1740     unsigned RI =
1741         llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1742     unsigned RE = RMS.size();
1743     for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1744       // Look for Uses[I] <= RMS <= Uses[I + 1].
1745       assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
1746       if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1747         continue;
1748       // Skip a regmask on the same instruction as the last use. It doesn't
1749       // overlap the live range.
1750       if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1751         break;
1752       LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1753                         << Uses[I + 1]);
1754       RegMaskGaps.push_back(I);
1755       // Advance ri to the next gap. A regmask on one of the uses counts in
1756       // both gaps.
1757       while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1758         ++RI;
1759     }
1760     LLVM_DEBUG(dbgs() << '\n');
1761   }
1762 
1763   // Since we allow local split results to be split again, there is a risk of
1764   // creating infinite loops. It is tempting to require that the new live
1765   // ranges have less instructions than the original. That would guarantee
1766   // convergence, but it is too strict. A live range with 3 instructions can be
1767   // split 2+3 (including the COPY), and we want to allow that.
1768   //
1769   // Instead we use these rules:
1770   //
1771   // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1772   //    noop split, of course).
1773   // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1774   //    the new ranges must have fewer instructions than before the split.
1775   // 3. New ranges with the same number of instructions are marked RS_Split2,
1776   //    smaller ranges are marked RS_New.
1777   //
1778   // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1779   // excessive splitting and infinite loops.
1780   //
1781   bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1782 
1783   // Best split candidate.
1784   unsigned BestBefore = NumGaps;
1785   unsigned BestAfter = 0;
1786   float BestDiff = 0;
1787 
1788   const float blockFreq =
1789       SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1790       (1.0f / MBFI->getEntryFreq().getFrequency());
1791   SmallVector<float, 8> GapWeight;
1792 
1793   for (MCPhysReg PhysReg : Order) {
1794     assert(PhysReg);
1795     // Keep track of the largest spill weight that would need to be evicted in
1796     // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1797     calcGapWeights(PhysReg, GapWeight);
1798 
1799     // Remove any gaps with regmask clobbers.
1800     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1801       for (unsigned Gap : RegMaskGaps)
1802         GapWeight[Gap] = huge_valf;
1803 
1804     // Try to find the best sequence of gaps to close.
1805     // The new spill weight must be larger than any gap interference.
1806 
1807     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1808     unsigned SplitBefore = 0, SplitAfter = 1;
1809 
1810     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1811     // It is the spill weight that needs to be evicted.
1812     float MaxGap = GapWeight[0];
1813 
1814     while (true) {
1815       // Live before/after split?
1816       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1817       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1818 
1819       LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1820                         << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1821 
1822       // Stop before the interval gets so big we wouldn't be making progress.
1823       if (!LiveBefore && !LiveAfter) {
1824         LLVM_DEBUG(dbgs() << " all\n");
1825         break;
1826       }
1827       // Should the interval be extended or shrunk?
1828       bool Shrink = true;
1829 
1830       // How many gaps would the new range have?
1831       unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1832 
1833       // Legally, without causing looping?
1834       bool Legal = !ProgressRequired || NewGaps < NumGaps;
1835 
1836       if (Legal && MaxGap < huge_valf) {
1837         // Estimate the new spill weight. Each instruction reads or writes the
1838         // register. Conservatively assume there are no read-modify-write
1839         // instructions.
1840         //
1841         // Try to guess the size of the new interval.
1842         const float EstWeight = normalizeSpillWeight(
1843             blockFreq * (NewGaps + 1),
1844             Uses[SplitBefore].distance(Uses[SplitAfter]) +
1845                 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1846             1);
1847         // Would this split be possible to allocate?
1848         // Never allocate all gaps, we wouldn't be making progress.
1849         LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1850         if (EstWeight * Hysteresis >= MaxGap) {
1851           Shrink = false;
1852           float Diff = EstWeight - MaxGap;
1853           if (Diff > BestDiff) {
1854             LLVM_DEBUG(dbgs() << " (best)");
1855             BestDiff = Hysteresis * Diff;
1856             BestBefore = SplitBefore;
1857             BestAfter = SplitAfter;
1858           }
1859         }
1860       }
1861 
1862       // Try to shrink.
1863       if (Shrink) {
1864         if (++SplitBefore < SplitAfter) {
1865           LLVM_DEBUG(dbgs() << " shrink\n");
1866           // Recompute the max when necessary.
1867           if (GapWeight[SplitBefore - 1] >= MaxGap) {
1868             MaxGap = GapWeight[SplitBefore];
1869             for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1870               MaxGap = std::max(MaxGap, GapWeight[I]);
1871           }
1872           continue;
1873         }
1874         MaxGap = 0;
1875       }
1876 
1877       // Try to extend the interval.
1878       if (SplitAfter >= NumGaps) {
1879         LLVM_DEBUG(dbgs() << " end\n");
1880         break;
1881       }
1882 
1883       LLVM_DEBUG(dbgs() << " extend\n");
1884       MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1885     }
1886   }
1887 
1888   // Didn't find any candidates?
1889   if (BestBefore == NumGaps)
1890     return MCRegister();
1891 
1892   LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1893                     << Uses[BestAfter] << ", " << BestDiff << ", "
1894                     << (BestAfter - BestBefore + 1) << " instrs\n");
1895 
1896   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1897   SE->reset(LREdit);
1898 
1899   SE->openIntv();
1900   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1901   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
1902   SE->useIntv(SegStart, SegStop);
1903   SmallVector<unsigned, 8> IntvMap;
1904   SE->finish(&IntvMap);
1905   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1906   // If the new range has the same number of instructions as before, mark it as
1907   // RS_Split2 so the next split will be forced to make progress. Otherwise,
1908   // leave the new intervals as RS_New so they can compete.
1909   bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1910   bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1911   unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1912   if (NewGaps >= NumGaps) {
1913     LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1914     assert(!ProgressRequired && "Didn't make progress when it was required.");
1915     for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1916       if (IntvMap[I] == 1) {
1917         ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1918         LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1919       }
1920     LLVM_DEBUG(dbgs() << '\n');
1921   }
1922   ++NumLocalSplits;
1923 
1924   return MCRegister();
1925 }
1926 
1927 //===----------------------------------------------------------------------===//
1928 //                          Live Range Splitting
1929 //===----------------------------------------------------------------------===//
1930 
1931 /// trySplit - Try to split VirtReg or one of its interferences, making it
1932 /// assignable.
1933 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1934 MCRegister RAGreedy::trySplit(const LiveInterval &VirtReg,
1935                               AllocationOrder &Order,
1936                               SmallVectorImpl<Register> &NewVRegs,
1937                               const SmallVirtRegSet &FixedRegisters) {
1938   // Ranges must be Split2 or less.
1939   if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1940     return MCRegister();
1941 
1942   // Local intervals are handled separately.
1943   if (LIS->intervalIsInOneMBB(VirtReg)) {
1944     NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1945                        TimerGroupDescription, TimePassesIsEnabled);
1946     SA->analyze(&VirtReg);
1947     MCRegister PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1948     if (PhysReg || !NewVRegs.empty())
1949       return PhysReg;
1950     return tryInstructionSplit(VirtReg, Order, NewVRegs);
1951   }
1952 
1953   NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1954                      TimerGroupDescription, TimePassesIsEnabled);
1955 
1956   SA->analyze(&VirtReg);
1957 
1958   // First try to split around a region spanning multiple blocks. RS_Split2
1959   // ranges already made dubious progress with region splitting, so they go
1960   // straight to single block splitting.
1961   if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1962     MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1963     if (PhysReg || !NewVRegs.empty())
1964       return PhysReg;
1965   }
1966 
1967   // Then isolate blocks.
1968   return tryBlockSplit(VirtReg, Order, NewVRegs);
1969 }
1970 
1971 //===----------------------------------------------------------------------===//
1972 //                          Last Chance Recoloring
1973 //===----------------------------------------------------------------------===//
1974 
1975 /// Return true if \p reg has any tied def operand.
1976 static bool hasTiedDef(MachineRegisterInfo *MRI, Register reg) {
1977   for (const MachineOperand &MO : MRI->def_operands(reg))
1978     if (MO.isTied())
1979       return true;
1980 
1981   return false;
1982 }
1983 
1984 /// Return true if the existing assignment of \p Intf overlaps, but is not the
1985 /// same, as \p PhysReg.
1986 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
1987                                          const VirtRegMap &VRM,
1988                                          MCRegister PhysReg,
1989                                          const LiveInterval &Intf) {
1990   MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1991   if (PhysReg == AssignedReg)
1992     return false;
1993   return TRI.regsOverlap(PhysReg, AssignedReg);
1994 }
1995 
1996 /// mayRecolorAllInterferences - Check if the virtual registers that
1997 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1998 /// recolored to free \p PhysReg.
1999 /// When true is returned, \p RecoloringCandidates has been augmented with all
2000 /// the live intervals that need to be recolored in order to free \p PhysReg
2001 /// for \p VirtReg.
2002 /// \p FixedRegisters contains all the virtual registers that cannot be
2003 /// recolored.
2004 bool RAGreedy::mayRecolorAllInterferences(
2005     MCRegister PhysReg, const LiveInterval &VirtReg,
2006     SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
2007   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
2008 
2009   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
2010     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
2011     // If there is LastChanceRecoloringMaxInterference or more interferences,
2012     // chances are one would not be recolorable.
2013     if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
2014             LastChanceRecoloringMaxInterference &&
2015         !ExhaustiveSearch) {
2016       LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
2017       CutOffInfo |= CO_Interf;
2018       return false;
2019     }
2020     for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
2021       // If Intf is done and sits on the same register class as VirtReg, it
2022       // would not be recolorable as it is in the same state as
2023       // VirtReg. However there are at least two exceptions.
2024       //
2025       // If VirtReg has tied defs and Intf doesn't, then
2026       // there is still a point in examining if it can be recolorable.
2027       //
2028       // Additionally, if the register class has overlapping tuple members, it
2029       // may still be recolorable using a different tuple. This is more likely
2030       // if the existing assignment aliases with the candidate.
2031       //
2032       if (((ExtraInfo->getStage(*Intf) == RS_Done &&
2033             MRI->getRegClass(Intf->reg()) == CurRC &&
2034             !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
2035            !(hasTiedDef(MRI, VirtReg.reg()) &&
2036              !hasTiedDef(MRI, Intf->reg()))) ||
2037           FixedRegisters.count(Intf->reg())) {
2038         LLVM_DEBUG(
2039             dbgs() << "Early abort: the interference is not recolorable.\n");
2040         return false;
2041       }
2042       RecoloringCandidates.insert(Intf);
2043     }
2044   }
2045   return true;
2046 }
2047 
2048 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
2049 /// its interferences.
2050 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
2051 /// virtual register that was using it. The recoloring process may recursively
2052 /// use the last chance recoloring. Therefore, when a virtual register has been
2053 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
2054 /// be last-chance-recolored again during this recoloring "session".
2055 /// E.g.,
2056 /// Let
2057 /// vA can use {R1, R2    }
2058 /// vB can use {    R2, R3}
2059 /// vC can use {R1        }
2060 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
2061 /// instance) and they all interfere.
2062 ///
2063 /// vA is assigned R1
2064 /// vB is assigned R2
2065 /// vC tries to evict vA but vA is already done.
2066 /// Regular register allocation fails.
2067 ///
2068 /// Last chance recoloring kicks in:
2069 /// vC does as if vA was evicted => vC uses R1.
2070 /// vC is marked as fixed.
2071 /// vA needs to find a color.
2072 /// None are available.
2073 /// vA cannot evict vC: vC is a fixed virtual register now.
2074 /// vA does as if vB was evicted => vA uses R2.
2075 /// vB needs to find a color.
2076 /// R3 is available.
2077 /// Recoloring => vC = R1, vA = R2, vB = R3
2078 ///
2079 /// \p Order defines the preferred allocation order for \p VirtReg.
2080 /// \p NewRegs will contain any new virtual register that have been created
2081 /// (split, spill) during the process and that must be assigned.
2082 /// \p FixedRegisters contains all the virtual registers that cannot be
2083 /// recolored.
2084 ///
2085 /// \p RecolorStack tracks the original assignments of successfully recolored
2086 /// registers.
2087 ///
2088 /// \p Depth gives the current depth of the last chance recoloring.
2089 /// \return a physical register that can be used for VirtReg or ~0u if none
2090 /// exists.
2091 MCRegister RAGreedy::tryLastChanceRecoloring(
2092     const LiveInterval &VirtReg, AllocationOrder &Order,
2093     SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters,
2094     RecoloringStack &RecolorStack, unsigned Depth) {
2095   if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
2096     return ~0u;
2097 
2098   LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
2099 
2100   const ssize_t EntryStackSize = RecolorStack.size();
2101 
2102   // Ranges must be Done.
2103   assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
2104          "Last chance recoloring should really be last chance");
2105   // Set the max depth to LastChanceRecoloringMaxDepth.
2106   // We may want to reconsider that if we end up with a too large search space
2107   // for target with hundreds of registers.
2108   // Indeed, in that case we may want to cut the search space earlier.
2109   if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
2110     LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
2111     CutOffInfo |= CO_Depth;
2112     return ~0u;
2113   }
2114 
2115   // Set of Live intervals that will need to be recolored.
2116   SmallLISet RecoloringCandidates;
2117 
2118   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
2119   // this recoloring "session".
2120   assert(!FixedRegisters.count(VirtReg.reg()));
2121   FixedRegisters.insert(VirtReg.reg());
2122   SmallVector<Register, 4> CurrentNewVRegs;
2123 
2124   for (MCRegister PhysReg : Order) {
2125     assert(PhysReg.isValid());
2126     LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
2127                       << printReg(PhysReg, TRI) << '\n');
2128     RecoloringCandidates.clear();
2129     CurrentNewVRegs.clear();
2130 
2131     // It is only possible to recolor virtual register interference.
2132     if (Matrix->checkInterference(VirtReg, PhysReg) >
2133         LiveRegMatrix::IK_VirtReg) {
2134       LLVM_DEBUG(
2135           dbgs() << "Some interferences are not with virtual registers.\n");
2136 
2137       continue;
2138     }
2139 
2140     // Early give up on this PhysReg if it is obvious we cannot recolor all
2141     // the interferences.
2142     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2143                                     FixedRegisters)) {
2144       LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2145       continue;
2146     }
2147 
2148     // RecoloringCandidates contains all the virtual registers that interfere
2149     // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
2150     // recoloring and perform the actual recoloring.
2151     PQueue RecoloringQueue;
2152     for (const LiveInterval *RC : RecoloringCandidates) {
2153       Register ItVirtReg = RC->reg();
2154       enqueue(RecoloringQueue, RC);
2155       assert(VRM->hasPhys(ItVirtReg) &&
2156              "Interferences are supposed to be with allocated variables");
2157 
2158       // Record the current allocation.
2159       RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
2160 
2161       // unset the related struct.
2162       Matrix->unassign(*RC);
2163     }
2164 
2165     // Do as if VirtReg was assigned to PhysReg so that the underlying
2166     // recoloring has the right information about the interferes and
2167     // available colors.
2168     Matrix->assign(VirtReg, PhysReg);
2169 
2170     // VirtReg may be deleted during tryRecoloringCandidates, save a copy.
2171     Register ThisVirtReg = VirtReg.reg();
2172 
2173     // Save the current recoloring state.
2174     // If we cannot recolor all the interferences, we will have to start again
2175     // at this point for the next physical register.
2176     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2177     if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2178                                 FixedRegisters, RecolorStack, Depth)) {
2179       // Push the queued vregs into the main queue.
2180       llvm::append_range(NewVRegs, CurrentNewVRegs);
2181       // Do not mess up with the global assignment process.
2182       // I.e., VirtReg must be unassigned.
2183       if (VRM->hasPhys(ThisVirtReg)) {
2184         Matrix->unassign(VirtReg);
2185         return PhysReg;
2186       }
2187 
2188       // It is possible VirtReg will be deleted during tryRecoloringCandidates.
2189       LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register "
2190                         << printReg(ThisVirtReg) << '\n');
2191       FixedRegisters.erase(ThisVirtReg);
2192       return MCRegister();
2193     }
2194 
2195     LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2196                       << printReg(PhysReg, TRI) << '\n');
2197 
2198     // The recoloring attempt failed, undo the changes.
2199     FixedRegisters = SaveFixedRegisters;
2200     Matrix->unassign(VirtReg);
2201 
2202     // For a newly created vreg which is also in RecoloringCandidates,
2203     // don't add it to NewVRegs because its physical register will be restored
2204     // below. Other vregs in CurrentNewVRegs are created by calling
2205     // selectOrSplit and should be added into NewVRegs.
2206     for (Register R : CurrentNewVRegs) {
2207       if (RecoloringCandidates.count(&LIS->getInterval(R)))
2208         continue;
2209       NewVRegs.push_back(R);
2210     }
2211 
2212     // Roll back our unsuccessful recoloring. Also roll back any successful
2213     // recolorings in any recursive recoloring attempts, since it's possible
2214     // they would have introduced conflicts with assignments we will be
2215     // restoring further up the stack. Perform all unassignments prior to
2216     // reassigning, since sub-recolorings may have conflicted with the registers
2217     // we are going to restore to their original assignments.
2218     for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2219       const LiveInterval *LI;
2220       MCRegister PhysReg;
2221       std::tie(LI, PhysReg) = RecolorStack[I];
2222 
2223       if (VRM->hasPhys(LI->reg()))
2224         Matrix->unassign(*LI);
2225     }
2226 
2227     for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2228       const LiveInterval *LI;
2229       MCRegister PhysReg;
2230       std::tie(LI, PhysReg) = RecolorStack[I];
2231       if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2232         Matrix->assign(*LI, PhysReg);
2233     }
2234 
2235     // Pop the stack of recoloring attempts.
2236     RecolorStack.resize(EntryStackSize);
2237   }
2238 
2239   // Last chance recoloring did not worked either, give up.
2240   return ~0u;
2241 }
2242 
2243 /// tryRecoloringCandidates - Try to assign a new color to every register
2244 /// in \RecoloringQueue.
2245 /// \p NewRegs will contain any new virtual register created during the
2246 /// recoloring process.
2247 /// \p FixedRegisters[in/out] contains all the registers that have been
2248 /// recolored.
2249 /// \return true if all virtual registers in RecoloringQueue were successfully
2250 /// recolored, false otherwise.
2251 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2252                                        SmallVectorImpl<Register> &NewVRegs,
2253                                        SmallVirtRegSet &FixedRegisters,
2254                                        RecoloringStack &RecolorStack,
2255                                        unsigned Depth) {
2256   while (!RecoloringQueue.empty()) {
2257     const LiveInterval *LI = dequeue(RecoloringQueue);
2258     LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2259     MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2260                                            RecolorStack, Depth + 1);
2261     // When splitting happens, the live-range may actually be empty.
2262     // In that case, this is okay to continue the recoloring even
2263     // if we did not find an alternative color for it. Indeed,
2264     // there will not be anything to color for LI in the end.
2265     if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2266       return false;
2267 
2268     if (!PhysReg) {
2269       assert(LI->empty() && "Only empty live-range do not require a register");
2270       LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2271                         << " succeeded. Empty LI.\n");
2272       continue;
2273     }
2274     LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2275                       << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2276 
2277     Matrix->assign(*LI, PhysReg);
2278     FixedRegisters.insert(LI->reg());
2279   }
2280   return true;
2281 }
2282 
2283 //===----------------------------------------------------------------------===//
2284 //                            Main Entry Point
2285 //===----------------------------------------------------------------------===//
2286 
2287 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
2288                                    SmallVectorImpl<Register> &NewVRegs) {
2289   CutOffInfo = CO_None;
2290   LLVMContext &Ctx = MF->getFunction().getContext();
2291   SmallVirtRegSet FixedRegisters;
2292   RecoloringStack RecolorStack;
2293   MCRegister Reg =
2294       selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2295   if (Reg == ~0U && (CutOffInfo != CO_None)) {
2296     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2297     if (CutOffEncountered == CO_Depth)
2298       Ctx.emitError("register allocation failed: maximum depth for recoloring "
2299                     "reached. Use -fexhaustive-register-search to skip "
2300                     "cutoffs");
2301     else if (CutOffEncountered == CO_Interf)
2302       Ctx.emitError("register allocation failed: maximum interference for "
2303                     "recoloring reached. Use -fexhaustive-register-search "
2304                     "to skip cutoffs");
2305     else if (CutOffEncountered == (CO_Depth | CO_Interf))
2306       Ctx.emitError("register allocation failed: maximum interference and "
2307                     "depth for recoloring reached. Use "
2308                     "-fexhaustive-register-search to skip cutoffs");
2309   }
2310   return Reg;
2311 }
2312 
2313 /// Using a CSR for the first time has a cost because it causes push|pop
2314 /// to be added to prologue|epilogue. Splitting a cold section of the live
2315 /// range can have lower cost than using the CSR for the first time;
2316 /// Spilling a live range in the cold path can have lower cost than using
2317 /// the CSR for the first time. Returns the physical register if we decide
2318 /// to use the CSR; otherwise return MCRegister().
2319 MCRegister RAGreedy::tryAssignCSRFirstTime(
2320     const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2321     uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2322   if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2323     // We choose spill over using the CSR for the first time if the spill cost
2324     // is lower than CSRCost.
2325     SA->analyze(&VirtReg);
2326     if (calcSpillCost() >= CSRCost)
2327       return PhysReg;
2328 
2329     // We are going to spill, set CostPerUseLimit to 1 to make sure that
2330     // we will not use a callee-saved register in tryEvict.
2331     CostPerUseLimit = 1;
2332     return MCRegister();
2333   }
2334   if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2335     // We choose pre-splitting over using the CSR for the first time if
2336     // the cost of splitting is lower than CSRCost.
2337     SA->analyze(&VirtReg);
2338     unsigned NumCands = 0;
2339     BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2340     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2341                                                  NumCands, true /*IgnoreCSR*/);
2342     if (BestCand == NoCand)
2343       // Use the CSR if we can't find a region split below CSRCost.
2344       return PhysReg;
2345 
2346     // Perform the actual pre-splitting.
2347     doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2348     return MCRegister();
2349   }
2350   return PhysReg;
2351 }
2352 
2353 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
2354   // Do not keep invalid information around.
2355   SetOfBrokenHints.remove(&LI);
2356 }
2357 
2358 void RAGreedy::initializeCSRCost() {
2359   // We use the command-line option if it is explicitly set, otherwise use the
2360   // larger one out of the command-line option and the value reported by TRI.
2361   CSRCost = BlockFrequency(
2362       CSRFirstTimeCost.getNumOccurrences()
2363           ? CSRFirstTimeCost
2364           : std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2365   if (!CSRCost.getFrequency())
2366     return;
2367 
2368   // Raw cost is relative to Entry == 2^14; scale it appropriately.
2369   uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
2370   if (!ActualEntry) {
2371     CSRCost = BlockFrequency(0);
2372     return;
2373   }
2374   uint64_t FixedEntry = 1 << 14;
2375   if (ActualEntry < FixedEntry)
2376     CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2377   else if (ActualEntry <= UINT32_MAX)
2378     // Invert the fraction and divide.
2379     CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2380   else
2381     // Can't use BranchProbability in general, since it takes 32-bit numbers.
2382     CSRCost =
2383         BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
2384 }
2385 
2386 /// Collect the hint info for \p Reg.
2387 /// The results are stored into \p Out.
2388 /// \p Out is not cleared before being populated.
2389 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2390   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2391     if (!TII->isFullCopyInstr(Instr))
2392       continue;
2393     // Look for the other end of the copy.
2394     Register OtherReg = Instr.getOperand(0).getReg();
2395     if (OtherReg == Reg) {
2396       OtherReg = Instr.getOperand(1).getReg();
2397       if (OtherReg == Reg)
2398         continue;
2399     }
2400     // Get the current assignment.
2401     MCRegister OtherPhysReg =
2402         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2403     // Push the collected information.
2404     Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2405                            OtherPhysReg));
2406   }
2407 }
2408 
2409 /// Using the given \p List, compute the cost of the broken hints if
2410 /// \p PhysReg was used.
2411 /// \return The cost of \p List for \p PhysReg.
2412 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2413                                            MCRegister PhysReg) {
2414   BlockFrequency Cost = BlockFrequency(0);
2415   for (const HintInfo &Info : List) {
2416     if (Info.PhysReg != PhysReg)
2417       Cost += Info.Freq;
2418   }
2419   return Cost;
2420 }
2421 
2422 /// Using the register assigned to \p VirtReg, try to recolor
2423 /// all the live ranges that are copy-related with \p VirtReg.
2424 /// The recoloring is then propagated to all the live-ranges that have
2425 /// been recolored and so on, until no more copies can be coalesced or
2426 /// it is not profitable.
2427 /// For a given live range, profitability is determined by the sum of the
2428 /// frequencies of the non-identity copies it would introduce with the old
2429 /// and new register.
2430 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2431   // We have a broken hint, check if it is possible to fix it by
2432   // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2433   // some register and PhysReg may be available for the other live-ranges.
2434   SmallSet<Register, 4> Visited;
2435   SmallVector<Register, 2> RecoloringCandidates;
2436   HintsInfo Info;
2437   Register Reg = VirtReg.reg();
2438   MCRegister PhysReg = VRM->getPhys(Reg);
2439   // Start the recoloring algorithm from the input live-interval, then
2440   // it will propagate to the ones that are copy-related with it.
2441   Visited.insert(Reg);
2442   RecoloringCandidates.push_back(Reg);
2443 
2444   LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2445                     << '(' << printReg(PhysReg, TRI) << ")\n");
2446 
2447   do {
2448     Reg = RecoloringCandidates.pop_back_val();
2449 
2450     // We cannot recolor physical register.
2451     if (Reg.isPhysical())
2452       continue;
2453 
2454     // This may be a skipped register.
2455     if (!VRM->hasPhys(Reg)) {
2456       assert(!shouldAllocateRegister(Reg) &&
2457              "We have an unallocated variable which should have been handled");
2458       continue;
2459     }
2460 
2461     // Get the live interval mapped with this virtual register to be able
2462     // to check for the interference with the new color.
2463     LiveInterval &LI = LIS->getInterval(Reg);
2464     MCRegister CurrPhys = VRM->getPhys(Reg);
2465     // Check that the new color matches the register class constraints and
2466     // that it is free for this live range.
2467     if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2468                                 Matrix->checkInterference(LI, PhysReg)))
2469       continue;
2470 
2471     LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2472                       << ") is recolorable.\n");
2473 
2474     // Gather the hint info.
2475     Info.clear();
2476     collectHintInfo(Reg, Info);
2477     // Check if recoloring the live-range will increase the cost of the
2478     // non-identity copies.
2479     if (CurrPhys != PhysReg) {
2480       LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2481       BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2482       BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2483       LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
2484                         << "\nNew Cost: "
2485                         << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
2486       if (OldCopiesCost < NewCopiesCost) {
2487         LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2488         continue;
2489       }
2490       // At this point, the cost is either cheaper or equal. If it is
2491       // equal, we consider this is profitable because it may expose
2492       // more recoloring opportunities.
2493       LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2494       // Recolor the live-range.
2495       Matrix->unassign(LI);
2496       Matrix->assign(LI, PhysReg);
2497     }
2498     // Push all copy-related live-ranges to keep reconciling the broken
2499     // hints.
2500     for (const HintInfo &HI : Info) {
2501       if (Visited.insert(HI.Reg).second)
2502         RecoloringCandidates.push_back(HI.Reg);
2503     }
2504   } while (!RecoloringCandidates.empty());
2505 }
2506 
2507 /// Try to recolor broken hints.
2508 /// Broken hints may be repaired by recoloring when an evicted variable
2509 /// freed up a register for a larger live-range.
2510 /// Consider the following example:
2511 /// BB1:
2512 ///   a =
2513 ///   b =
2514 /// BB2:
2515 ///   ...
2516 ///   = b
2517 ///   = a
2518 /// Let us assume b gets split:
2519 /// BB1:
2520 ///   a =
2521 ///   b =
2522 /// BB2:
2523 ///   c = b
2524 ///   ...
2525 ///   d = c
2526 ///   = d
2527 ///   = a
2528 /// Because of how the allocation work, b, c, and d may be assigned different
2529 /// colors. Now, if a gets evicted later:
2530 /// BB1:
2531 ///   a =
2532 ///   st a, SpillSlot
2533 ///   b =
2534 /// BB2:
2535 ///   c = b
2536 ///   ...
2537 ///   d = c
2538 ///   = d
2539 ///   e = ld SpillSlot
2540 ///   = e
2541 /// This is likely that we can assign the same register for b, c, and d,
2542 /// getting rid of 2 copies.
2543 void RAGreedy::tryHintsRecoloring() {
2544   for (const LiveInterval *LI : SetOfBrokenHints) {
2545     assert(LI->reg().isVirtual() &&
2546            "Recoloring is possible only for virtual registers");
2547     // Some dead defs may be around (e.g., because of debug uses).
2548     // Ignore those.
2549     if (!VRM->hasPhys(LI->reg()))
2550       continue;
2551     tryHintRecoloring(*LI);
2552   }
2553 }
2554 
2555 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2556                                        SmallVectorImpl<Register> &NewVRegs,
2557                                        SmallVirtRegSet &FixedRegisters,
2558                                        RecoloringStack &RecolorStack,
2559                                        unsigned Depth) {
2560   uint8_t CostPerUseLimit = uint8_t(~0u);
2561   // First try assigning a free register.
2562   auto Order =
2563       AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
2564   if (MCRegister PhysReg =
2565           tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2566     // When NewVRegs is not empty, we may have made decisions such as evicting
2567     // a virtual register, go with the earlier decisions and use the physical
2568     // register.
2569     if (CSRCost.getFrequency() &&
2570         EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2571       MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2572                                                 CostPerUseLimit, NewVRegs);
2573       if (CSRReg || !NewVRegs.empty())
2574         // Return now if we decide to use a CSR or create new vregs due to
2575         // pre-splitting.
2576         return CSRReg;
2577     } else
2578       return PhysReg;
2579   }
2580   // Non empty NewVRegs means VirtReg has been split.
2581   if (!NewVRegs.empty())
2582     return MCRegister();
2583 
2584   LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2585   LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2586                     << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2587 
2588   // Try to evict a less worthy live range, but only for ranges from the primary
2589   // queue. The RS_Split ranges already failed to do this, and they should not
2590   // get a second chance until they have been split.
2591   if (Stage != RS_Split) {
2592     if (MCRegister PhysReg =
2593             tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2594                      FixedRegisters)) {
2595       Register Hint = MRI->getSimpleHint(VirtReg.reg());
2596       // If VirtReg has a hint and that hint is broken record this
2597       // virtual register as a recoloring candidate for broken hint.
2598       // Indeed, since we evicted a variable in its neighborhood it is
2599       // likely we can at least partially recolor some of the
2600       // copy-related live-ranges.
2601       if (Hint && Hint != PhysReg)
2602         SetOfBrokenHints.insert(&VirtReg);
2603       return PhysReg;
2604     }
2605   }
2606 
2607   assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2608 
2609   // The first time we see a live range, don't try to split or spill.
2610   // Wait until the second time, when all smaller ranges have been allocated.
2611   // This gives a better picture of the interference to split around.
2612   if (Stage < RS_Split) {
2613     ExtraInfo->setStage(VirtReg, RS_Split);
2614     LLVM_DEBUG(dbgs() << "wait for second round\n");
2615     NewVRegs.push_back(VirtReg.reg());
2616     return MCRegister();
2617   }
2618 
2619   if (Stage < RS_Spill && !VirtReg.empty()) {
2620     // Try splitting VirtReg or interferences.
2621     unsigned NewVRegSizeBefore = NewVRegs.size();
2622     MCRegister PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2623     if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2624       return PhysReg;
2625   }
2626 
2627   // If we couldn't allocate a register from spilling, there is probably some
2628   // invalid inline assembly. The base class will report it.
2629   if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2630     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2631                                    RecolorStack, Depth);
2632   }
2633 
2634   // Finally spill VirtReg itself.
2635   NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2636                      TimerGroupDescription, TimePassesIsEnabled);
2637   LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2638   spiller().spill(LRE, &Order);
2639   ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2640 
2641   // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2642   // the new regs are kept in LDV (still mapping to the old register), until
2643   // we rewrite spilled locations in LDV at a later stage.
2644   for (Register r : spiller().getSpilledRegs())
2645     DebugVars->splitRegister(r, LRE.regs(), *LIS);
2646   for (Register r : spiller().getReplacedRegs())
2647     DebugVars->splitRegister(r, LRE.regs(), *LIS);
2648 
2649   if (VerifyEnabled)
2650     MF->verify(LIS, Indexes, "After spilling", &errs());
2651 
2652   // The live virtual register requesting allocation was spilled, so tell
2653   // the caller not to allocate anything during this round.
2654   return MCRegister();
2655 }
2656 
2657 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2658   using namespace ore;
2659   if (Spills) {
2660     R << NV("NumSpills", Spills) << " spills ";
2661     R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2662   }
2663   if (FoldedSpills) {
2664     R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2665     R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2666       << " total folded spills cost ";
2667   }
2668   if (Reloads) {
2669     R << NV("NumReloads", Reloads) << " reloads ";
2670     R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2671   }
2672   if (FoldedReloads) {
2673     R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2674     R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2675       << " total folded reloads cost ";
2676   }
2677   if (ZeroCostFoldedReloads)
2678     R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2679       << " zero cost folded reloads ";
2680   if (Copies) {
2681     R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2682     R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2683   }
2684 }
2685 
2686 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2687   RAGreedyStats Stats;
2688   const MachineFrameInfo &MFI = MF->getFrameInfo();
2689   int FI;
2690 
2691   auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2692     return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2693         A->getPseudoValue())->getFrameIndex());
2694   };
2695   auto isPatchpointInstr = [](const MachineInstr &MI) {
2696     return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2697            MI.getOpcode() == TargetOpcode::STACKMAP ||
2698            MI.getOpcode() == TargetOpcode::STATEPOINT;
2699   };
2700   for (MachineInstr &MI : MBB) {
2701     auto DestSrc = TII->isCopyInstr(MI);
2702     if (DestSrc) {
2703       const MachineOperand &Dest = *DestSrc->Destination;
2704       const MachineOperand &Src = *DestSrc->Source;
2705       Register SrcReg = Src.getReg();
2706       Register DestReg = Dest.getReg();
2707       // Only count `COPY`s with a virtual register as source or destination.
2708       if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2709         if (SrcReg.isVirtual()) {
2710           SrcReg = VRM->getPhys(SrcReg);
2711           if (SrcReg && Src.getSubReg())
2712             SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2713         }
2714         if (DestReg.isVirtual()) {
2715           DestReg = VRM->getPhys(DestReg);
2716           if (DestReg && Dest.getSubReg())
2717             DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2718         }
2719         if (SrcReg != DestReg)
2720           ++Stats.Copies;
2721       }
2722       continue;
2723     }
2724 
2725     SmallVector<const MachineMemOperand *, 2> Accesses;
2726     if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2727       ++Stats.Reloads;
2728       continue;
2729     }
2730     if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2731       ++Stats.Spills;
2732       continue;
2733     }
2734     if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2735         llvm::any_of(Accesses, isSpillSlotAccess)) {
2736       if (!isPatchpointInstr(MI)) {
2737         Stats.FoldedReloads += Accesses.size();
2738         continue;
2739       }
2740       // For statepoint there may be folded and zero cost folded stack reloads.
2741       std::pair<unsigned, unsigned> NonZeroCostRange =
2742           TII->getPatchpointUnfoldableRange(MI);
2743       SmallSet<unsigned, 16> FoldedReloads;
2744       SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2745       for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2746         MachineOperand &MO = MI.getOperand(Idx);
2747         if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2748           continue;
2749         if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2750           FoldedReloads.insert(MO.getIndex());
2751         else
2752           ZeroCostFoldedReloads.insert(MO.getIndex());
2753       }
2754       // If stack slot is used in folded reload it is not zero cost then.
2755       for (unsigned Slot : FoldedReloads)
2756         ZeroCostFoldedReloads.erase(Slot);
2757       Stats.FoldedReloads += FoldedReloads.size();
2758       Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2759       continue;
2760     }
2761     Accesses.clear();
2762     if (TII->hasStoreToStackSlot(MI, Accesses) &&
2763         llvm::any_of(Accesses, isSpillSlotAccess)) {
2764       Stats.FoldedSpills += Accesses.size();
2765     }
2766   }
2767   // Set cost of collected statistic by multiplication to relative frequency of
2768   // this basic block.
2769   float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2770   Stats.ReloadsCost = RelFreq * Stats.Reloads;
2771   Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2772   Stats.SpillsCost = RelFreq * Stats.Spills;
2773   Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2774   Stats.CopiesCost = RelFreq * Stats.Copies;
2775   return Stats;
2776 }
2777 
2778 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2779   RAGreedyStats Stats;
2780 
2781   // Sum up the spill and reloads in subloops.
2782   for (MachineLoop *SubLoop : *L)
2783     Stats.add(reportStats(SubLoop));
2784 
2785   for (MachineBasicBlock *MBB : L->getBlocks())
2786     // Handle blocks that were not included in subloops.
2787     if (Loops->getLoopFor(MBB) == L)
2788       Stats.add(computeStats(*MBB));
2789 
2790   if (!Stats.isEmpty()) {
2791     using namespace ore;
2792 
2793     ORE->emit([&]() {
2794       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2795                                         L->getStartLoc(), L->getHeader());
2796       Stats.report(R);
2797       R << "generated in loop";
2798       return R;
2799     });
2800   }
2801   return Stats;
2802 }
2803 
2804 void RAGreedy::reportStats() {
2805   if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2806     return;
2807   RAGreedyStats Stats;
2808   for (MachineLoop *L : *Loops)
2809     Stats.add(reportStats(L));
2810   // Process non-loop blocks.
2811   for (MachineBasicBlock &MBB : *MF)
2812     if (!Loops->getLoopFor(&MBB))
2813       Stats.add(computeStats(MBB));
2814   if (!Stats.isEmpty()) {
2815     using namespace ore;
2816 
2817     ORE->emit([&]() {
2818       DebugLoc Loc;
2819       if (auto *SP = MF->getFunction().getSubprogram())
2820         Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2821       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2822                                         &MF->front());
2823       Stats.report(R);
2824       R << "generated in function";
2825       return R;
2826     });
2827   }
2828 }
2829 
2830 bool RAGreedy::hasVirtRegAlloc() {
2831   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2832     Register Reg = Register::index2VirtReg(I);
2833     if (MRI->reg_nodbg_empty(Reg))
2834       continue;
2835     if (shouldAllocateRegister(Reg))
2836       return true;
2837   }
2838 
2839   return false;
2840 }
2841 
2842 bool RAGreedy::run(MachineFunction &mf) {
2843   LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2844                     << "********** Function: " << mf.getName() << '\n');
2845 
2846   MF = &mf;
2847   TII = MF->getSubtarget().getInstrInfo();
2848 
2849   if (VerifyEnabled)
2850     MF->verify(LIS, Indexes, "Before greedy register allocator", &errs());
2851 
2852   RegAllocBase::init(*this->VRM, *this->LIS, *this->Matrix);
2853 
2854   // Early return if there is no virtual register to be allocated to a
2855   // physical register.
2856   if (!hasVirtRegAlloc())
2857     return false;
2858 
2859   // Renumber to get accurate and consistent results from
2860   // SlotIndexes::getApproxInstrDistance.
2861   Indexes->packIndexes();
2862 
2863   initializeCSRCost();
2864 
2865   RegCosts = TRI->getRegisterCosts(*MF);
2866   RegClassPriorityTrumpsGlobalness =
2867       GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2868           ? GreedyRegClassPriorityTrumpsGlobalness
2869           : TRI->regClassPriorityTrumpsGlobalness(*MF);
2870 
2871   ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2872                                ? GreedyReverseLocalAssignment
2873                                : TRI->reverseLocalAssignment();
2874 
2875   ExtraInfo.emplace();
2876 
2877   EvictAdvisor = EvictProvider->getAdvisor(*MF, *this, MBFI, Loops);
2878   PriorityAdvisor = PriorityProvider->getAdvisor(*MF, *this, *Indexes);
2879 
2880   VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2881   SpillerInstance.reset(createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF,
2882                                             *VRM, *VRAI, Matrix));
2883 
2884   VRAI->calculateSpillWeightsAndHints();
2885 
2886   LLVM_DEBUG(LIS->dump());
2887 
2888   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2889   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2890 
2891   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2892   GlobalCand.resize(32);  // This will grow as needed.
2893   SetOfBrokenHints.clear();
2894 
2895   allocatePhysRegs();
2896   tryHintsRecoloring();
2897 
2898   if (VerifyEnabled)
2899     MF->verify(LIS, Indexes, "Before post optimization", &errs());
2900   postOptimization();
2901   reportStats();
2902 
2903   releaseMemory();
2904   return true;
2905 }
2906