xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the ML eviction advisor and reward injection pass
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AllocationOrder.h"
14 #include "RegAllocGreedy.h"
15 #include "llvm/Analysis/InteractiveModelRunner.h"
16 #include "llvm/Analysis/MLModelRunner.h"
17 #include "llvm/Analysis/TensorSpec.h"
18 #include "llvm/CodeGen/RegAllocEvictionAdvisor.h"
19 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
20 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
21 #include "llvm/Analysis/NoInferenceModelRunner.h"
22 #include "llvm/Analysis/Utils/TrainingLogger.h"
23 #endif
24 #include "MLRegAllocEvictAdvisor.h"
25 #include "llvm/Analysis/ReleaseModeModelRunner.h"
26 #include "llvm/CodeGen/CalcSpillWeights.h"
27 #include "llvm/CodeGen/LiveRegMatrix.h"
28 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineLoopInfo.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/CodeGen/RegisterClassInfo.h"
34 #include "llvm/CodeGen/VirtRegMap.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/PassRegistry.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/ErrorHandling.h"
41 
42 #include <array>
43 #include <bitset>
44 #include <memory>
45 #include <unordered_map>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "ml-regalloc"
50 
51 // Generated header in release (AOT) mode
52 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
53 #include "RegAllocEvictModel.h"
54 using CompiledModelType = RegAllocEvictModel;
55 #else
56 using CompiledModelType = NoopSavedModelImpl;
57 #endif
58 
59 static cl::opt<std::string> InteractiveChannelBaseName(
60     "regalloc-evict-interactive-channel-base", cl::Hidden,
61     cl::desc(
62         "Base file path for the interactive mode. The incoming filename should "
63         "have the name <regalloc-evict-interactive-channel-base>.in, while the "
64         "outgoing name should be "
65         "<regalloc-evict-interactive-channel-base>.out"));
66 
67 static cl::opt<unsigned> MaxEvictionCount(
68     "mlregalloc-max-eviction-count", cl::Hidden,
69     cl::desc("The maximum number of times a live range can be "
70              "evicted before preventing it from being evicted"),
71     cl::init(100));
72 
73 // Options that only make sense in development mode
74 #ifdef LLVM_HAVE_TFLITE
75 #include "RegAllocScore.h"
76 #include "llvm/Analysis/Utils/TFUtils.h"
77 
78 static cl::opt<std::string> TrainingLog(
79     "regalloc-training-log", cl::Hidden,
80     cl::desc("Training log for the register allocator eviction model"));
81 
82 static cl::opt<std::string> ModelUnderTraining(
83     "regalloc-model", cl::Hidden,
84     cl::desc("The model being trained for register allocation eviction"));
85 
86 static cl::opt<bool> EnableDevelopmentFeatures(
87     "regalloc-enable-development-features", cl::Hidden,
88     cl::desc("Whether or not to enable features under development for the ML "
89              "regalloc advisor"));
90 
91 #else
92 static const bool EnableDevelopmentFeatures = false;
93 #endif // #ifdef LLVM_HAVE_TFLITE
94 
95 /// The score injection pass.
96 /// This pass calculates the score for a function and inserts it in the log, but
97 /// this happens only in development mode. It's a no-op otherwise.
98 namespace llvm {
99 extern cl::opt<unsigned> EvictInterferenceCutoff;
100 
101 class RegAllocScoring : public MachineFunctionPass {
102 public:
103   static char ID;
104 
RegAllocScoring()105   RegAllocScoring() : MachineFunctionPass(ID) {
106     initializeRegAllocScoringPass(*PassRegistry::getPassRegistry());
107   }
108 
109   ~RegAllocScoring() override = default;
110 
getPassName() const111   StringRef getPassName() const override {
112     return "Register Allocation Pass Scoring";
113   }
114 
115   /// RegAllocReward analysis usage.
getAnalysisUsage(AnalysisUsage & AU) const116   void getAnalysisUsage(AnalysisUsage &AU) const override {
117     AU.setPreservesAll();
118     AU.addRequired<RegAllocEvictionAdvisorAnalysisLegacy>();
119     AU.addRequired<RegAllocPriorityAdvisorAnalysisLegacy>();
120     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
121     MachineFunctionPass::getAnalysisUsage(AU);
122   }
123 
124   /// Performs this pass
125   bool runOnMachineFunction(MachineFunction &) override;
126 };
127 
128 char RegAllocScoring::ID = 0;
createRegAllocScoringPass()129 FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); }
130 
131 } // namespace llvm
132 
133 INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
134                 "Register Allocation Scoring Pass", false, false)
135 
136 // ===================================
137 // Common ML Advisor declarations
138 // ===================================
139 namespace {
140 // The model can only accept a specified number of opcodes and will error it if
141 // fed an opcode it hasn't seen before. This constant sets the current cutoff.
142 static const int OpcodeValueCutoff = 17716;
143 
144 // Most features are as described above, so we'll reuse this vector in defining
145 // them.
146 static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
147 
148 // --------------
149 // Features table
150 // --------------
151 // For each interfering live range (incl. the candidate) we collect a number of
152 // features. However, because the features are of different types (and because
153 // of ML best practices), we organize the tensors per feature, not per
154 // candidate. Each such tensor has a scalar value corresponding to the
155 // interferring live range at that position, in the order in AllocationOrder.
156 // The last position corresponds to the virt reg seeking allocation.
157 // Exception to all that is the progression feature, which is just a scalar (see
158 // its documentation for details).
159 // Note on naming: the "_by_max" are normalized using the largest value of that
160 // tensor, as observed in the current decision making stage (i.e. for the
161 // current call to the advisor's tryFindEvictionCandidate)
162 //
163 // The feature list format: type, name, shape, documentation.
164 // Note: we can really just use int64 and float, hence the modeling of some
165 // bools as int64 values.
166 #define RA_EVICT_FEATURES_LIST(M)                                              \
167   M(int64_t, mask, PerLiveRangeShape,                                          \
168     "boolean values, 0 for unavailable candidates (i.e. if a position is 0, "  \
169     "it "                                                                      \
170     "can't be evicted)")                                                       \
171   M(int64_t, is_free, PerLiveRangeShape,                                       \
172     "boolean values, 1 if this phys reg is actually free (no interferences)")  \
173   M(float, nr_urgent, PerLiveRangeShape,                                       \
174     "number of 'urgent' intervals, normalized. Urgent are those that are OK "  \
175     "to break cascades")                                                       \
176   M(float, nr_broken_hints, PerLiveRangeShape,                                 \
177     "if this position were evicted, how many broken hints would there be")     \
178   M(int64_t, is_hint, PerLiveRangeShape,                                       \
179     "is this a preferred phys reg for the candidate")                          \
180   M(int64_t, is_local, PerLiveRangeShape,                                      \
181     "is this live range local to a basic block")                               \
182   M(float, nr_rematerializable, PerLiveRangeShape,                             \
183     "nr rematerializable ranges")                                              \
184   M(float, nr_defs_and_uses, PerLiveRangeShape,                                \
185     "bb freq - weighed nr defs and uses")                                      \
186   M(float, weighed_reads_by_max, PerLiveRangeShape,                            \
187     "bb freq - weighed nr of reads, normalized")                               \
188   M(float, weighed_writes_by_max, PerLiveRangeShape,                           \
189     "bb feq - weighed nr of writes, normalized")                               \
190   M(float, weighed_read_writes_by_max, PerLiveRangeShape,                      \
191     "bb freq - weighed nr of uses that are both read and writes, normalized")  \
192   M(float, weighed_indvars_by_max, PerLiveRangeShape,                          \
193     "bb freq - weighed nr of uses that are indvars, normalized")               \
194   M(float, hint_weights_by_max, PerLiveRangeShape,                             \
195     "bb freq - weighed nr of uses that are hints, normalized")                 \
196   M(float, start_bb_freq_by_max, PerLiveRangeShape,                            \
197     "the freq in the start block, normalized")                                 \
198   M(float, end_bb_freq_by_max, PerLiveRangeShape,                              \
199     "freq of end block, normalized")                                           \
200   M(float, hottest_bb_freq_by_max, PerLiveRangeShape,                          \
201     "hottest BB freq, normalized")                                             \
202   M(float, liverange_size, PerLiveRangeShape,                                  \
203     "size (instr index diff) of the LR")                                       \
204   M(float, use_def_density, PerLiveRangeShape,                                 \
205     "the max weight, as computed by the manual heuristic")                     \
206   M(int64_t, max_stage, PerLiveRangeShape,                                     \
207     "largest stage of an interval in this LR")                                 \
208   M(int64_t, min_stage, PerLiveRangeShape,                                     \
209     "lowest stage of an interval in this LR")                                  \
210   M(float, progress, {1}, "ratio of current queue size to initial size")
211 
212 #ifdef LLVM_HAVE_TFLITE
213 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)                                  \
214   M(int64_t, instructions, InstructionsShape,                                  \
215     "Opcodes of the instructions covered by the eviction problem")
216 
217 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)                                  \
218   M(int64_t, instructions_mapping, InstructionsMappingShape,                   \
219     "A binary matrix mapping LRs to instruction opcodes")                      \
220   M(float, mbb_frequencies, MBBFrequencyShape,                                 \
221     "A vector of machine basic block frequencies")                             \
222   M(int64_t, mbb_mapping, InstructionsShape,                                   \
223     "A vector of indices mapping instructions to MBBs")
224 #else
225 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
226 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
227 #endif
228 
229 // The model learns to pick one of the mask == 1 interferences. This is the
230 // name of the output tensor. The contract with the model is that the output
231 // will be guaranteed to be to a mask == 1 position. Using a macro here to
232 // avoid 'not used' warnings (and keep cond compilation to a minimum)
233 #define DecisionName "index_to_evict"
234 static const TensorSpec DecisionSpec =
235     TensorSpec::createSpec<int64_t>(DecisionName, {1});
236 
237 // Named features index.
238 enum FeatureIDs {
239 #define _FEATURE_IDX_SIMPLE(_, name, __, ___) name
240 #define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D),
241   RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount,
242 #ifdef LLVM_HAVE_TFLITE
243   RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount,
244 #else
245   RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX)
246 #endif // #ifdef LLVM_HAVE_TFLITE
247   RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount
248 #undef _FEATURE_IDX
249 #undef _FEATURE_IDX_SIMPLE
250 };
251 
252 // The ML advisor will typically have a sparse input to the evaluator, because
253 // various phys regs won't be available. It's easier (maintenance-wise) to
254 // bulk-reset the state of the evaluator each time we are about to use it
255 // again.
getTotalSize(const std::vector<int64_t> & Shape)256 template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) {
257   size_t Ret = sizeof(T);
258   for (const auto V : Shape)
259     Ret *= V;
260   return Ret;
261 }
262 
resetInputs(MLModelRunner & Runner)263 void resetInputs(MLModelRunner &Runner) {
264 #define _RESET(TYPE, NAME, SHAPE, __)                                          \
265   std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0,                    \
266               getTotalSize<TYPE>(SHAPE));
267   RA_EVICT_FEATURES_LIST(_RESET)
268   if (EnableDevelopmentFeatures) {
269     RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET)
270     RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET)
271 #undef _RESET
272   }
273 }
274 
275 // Per-live interval components that get aggregated into the feature values
276 // that will be passed to the evaluator.
277 struct LIFeatureComponents {
278   double R = 0;
279   double W = 0;
280   double RW = 0;
281   double IndVarUpdates = 0;
282   double HintWeights = 0.0;
283   int64_t NumDefsAndUses = 0;
284   float HottestBlockFreq = 0.0;
285   bool IsRemat = false;
286 };
287 
288 using CandidateRegList =
289     std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
290 using FeaturesListNormalizer =
291     llvm::SmallVector<float, FeatureIDs::FeatureCount>;
292 
293 /// The ML evictor (commonalities between release and development mode)
294 class MLEvictAdvisor : public RegAllocEvictionAdvisor {
295 public:
296   MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
297                  MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
298                  const MachineLoopInfo &Loops);
299 
300 protected:
getDefaultAdvisor() const301   const RegAllocEvictionAdvisor &getDefaultAdvisor() const {
302     return static_cast<const RegAllocEvictionAdvisor &>(DefaultAdvisor);
303   }
304 
305   // The assumption is that if the Runner could not be constructed, we emit-ed
306   // error, and we shouldn't be asking for it here.
getRunner() const307   const MLModelRunner &getRunner() const { return *Runner; }
308 
309   /// This just calls Evaluate on the Runner, but in the development mode
310   /// case, if we're just capturing the log of the default advisor, it needs
311   /// to call the latter instead, so we need to pass all the necessary
312   /// parameters for it. In the development case, it will also log.
313   virtual int64_t
314   tryFindEvictionCandidatePosition(const LiveInterval &VirtReg,
315                                    const AllocationOrder &Order,
316                                    unsigned OrderLimit, uint8_t CostPerUseLimit,
317                                    const SmallVirtRegSet &FixedRegisters) const;
318 
319   /// Load the features of the given VirtReg (allocated or not) at column Pos,
320   /// but if  that can't be evicted, return false instead.
321   bool
322   loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
323                            bool IsHint, const SmallVirtRegSet &FixedRegisters,
324                            llvm::SmallVectorImpl<float> &Largest, size_t Pos,
325                            SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
326 
327 private:
328   static float getInitialQueueSize(const MachineFunction &MF);
329 
330   MCRegister tryFindEvictionCandidate(
331       const LiveInterval &VirtReg, const AllocationOrder &Order,
332       uint8_t CostPerUseLimit,
333       const SmallVirtRegSet &FixedRegisters) const override;
334 
335   void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
336                        llvm::SmallVectorImpl<float> &Largest, size_t Pos,
337                        int64_t IsHint, int64_t LocalIntfsCount, float NumUrgent,
338                        SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
339 
340   // Point-in-time: we didn't learn this, so we always delegate to the
341   // default.
canEvictHintInterference(const LiveInterval & VirtReg,MCRegister PhysReg,const SmallVirtRegSet & FixedRegisters) const342   bool canEvictHintInterference(
343       const LiveInterval &VirtReg, MCRegister PhysReg,
344       const SmallVirtRegSet &FixedRegisters) const override {
345     return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
346                                                         FixedRegisters);
347   }
348 
349   const LIFeatureComponents &
350   getLIFeatureComponents(const LiveInterval &LI) const;
351 
352   // Hold on to a default advisor for:
353   // 1) the implementation of canEvictHintInterference, because we didn't
354   // learn that nuance yet; 2) for bootstrapping (logging) in the development
355   // mode case.
356   const DefaultEvictionAdvisor DefaultAdvisor;
357   MLModelRunner *const Runner;
358   const MachineBlockFrequencyInfo &MBFI;
359   const MachineLoopInfo &Loops;
360 
361   // Indices of those features we don't want to normalize.
362   // This could be static and shared, but its initialization is non-trivial.
363   std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
364   const float InitialQSize;
365 
366   using RegID = unsigned;
367   mutable DenseMap<RegID, LIFeatureComponents> CachedFeatures;
368 
369   mutable std::unordered_map<unsigned, unsigned> VirtRegEvictionCounts;
370 
onEviction(Register RegBeingEvicted) const371   void onEviction(Register RegBeingEvicted) const {
372     // If we cannot find the virtual register in the map, we just assume it has
373     // not been evicted before and thus has a value of zero (which is what the
374     // subscript operator returns by default).
375     ++VirtRegEvictionCounts[RegBeingEvicted.id()];
376   }
377 
getEvictionCount(Register Reg) const378   unsigned getEvictionCount(Register Reg) const {
379     auto EvictionCountIt = VirtRegEvictionCounts.find(Reg.id());
380     if (EvictionCountIt != VirtRegEvictionCounts.end())
381       return EvictionCountIt->second;
382     return 0;
383   }
384 };
385 
386 #define _DECL_FEATURES(type, name, shape, _)                                   \
387   TensorSpec::createSpec<type>(#name, shape),
388 
389 // ===================================
390 // Release (AOT) - specifics
391 // ===================================
392 /// Common provider for legacy and new pass managers.
393 class ReleaseModeEvictionAdvisorProvider final
394     : public RegAllocEvictionAdvisorProvider {
395 public:
ReleaseModeEvictionAdvisorProvider(LLVMContext & Ctx)396   ReleaseModeEvictionAdvisorProvider(LLVMContext &Ctx)
397       : RegAllocEvictionAdvisorProvider(AdvisorMode::Release, Ctx) {
398     if (EnableDevelopmentFeatures) {
399       InputFeatures = {RA_EVICT_FEATURES_LIST(
400           _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
401                            RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
402     } else {
403       InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
404     }
405   }
406   // support for isa<> and dyn_cast.
classof(const RegAllocEvictionAdvisorProvider * R)407   static bool classof(const RegAllocEvictionAdvisorProvider *R) {
408     return R->getAdvisorMode() == AdvisorMode::Release;
409   }
410 
411   std::unique_ptr<RegAllocEvictionAdvisor>
getAdvisor(const MachineFunction & MF,const RAGreedy & RA,MachineBlockFrequencyInfo * MBFI,MachineLoopInfo * Loops)412   getAdvisor(const MachineFunction &MF, const RAGreedy &RA,
413              MachineBlockFrequencyInfo *MBFI, MachineLoopInfo *Loops) override {
414     if (!Runner) {
415       if (InteractiveChannelBaseName.empty())
416         Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
417             MF.getFunction().getContext(), InputFeatures, DecisionName);
418       else
419         Runner = std::make_unique<InteractiveModelRunner>(
420             MF.getFunction().getContext(), InputFeatures, DecisionSpec,
421             InteractiveChannelBaseName + ".out",
422             InteractiveChannelBaseName + ".in");
423     }
424     assert(MBFI && Loops &&
425            "Invalid provider state: must have analysis available");
426     return std::make_unique<MLEvictAdvisor>(MF, RA, Runner.get(), *MBFI,
427                                             *Loops);
428   }
429 
430 private:
431   std::vector<TensorSpec> InputFeatures;
432   std::unique_ptr<MLModelRunner> Runner;
433 };
434 
435 class ReleaseModeEvictionAdvisorAnalysisLegacy final
436     : public RegAllocEvictionAdvisorAnalysisLegacy {
437 public:
ReleaseModeEvictionAdvisorAnalysisLegacy()438   ReleaseModeEvictionAdvisorAnalysisLegacy()
439       : RegAllocEvictionAdvisorAnalysisLegacy(AdvisorMode::Release) {}
440 
logRewardIfNeeded(const MachineFunction & MF,llvm::function_ref<float ()> GetReward)441   void logRewardIfNeeded(const MachineFunction &MF,
442                          llvm::function_ref<float()> GetReward) override {
443     // No-op in release mode
444   }
445 
doInitialization(Module & M)446   bool doInitialization(Module &M) override {
447     Provider =
448         std::make_unique<ReleaseModeEvictionAdvisorProvider>(M.getContext());
449     return false;
450   }
451 
classof(const RegAllocEvictionAdvisorAnalysisLegacy * R)452   static bool classof(const RegAllocEvictionAdvisorAnalysisLegacy *R) {
453     return R->getAdvisorMode() == AdvisorMode::Release;
454   }
455 
getAnalysisUsage(AnalysisUsage & AU) const456   void getAnalysisUsage(AnalysisUsage &AU) const override {
457     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
458     AU.addRequired<MachineLoopInfoWrapperPass>();
459     RegAllocEvictionAdvisorAnalysisLegacy::getAnalysisUsage(AU);
460   }
461 };
462 
463 // ===================================
464 // Development mode-specifics
465 // ===================================
466 //
467 // Features we log
468 #ifdef LLVM_HAVE_TFLITE
469 static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
470 
471 // Features we bind on the model. The tensor names have a prefix, and we also
472 // need to include some tensors that are expected to be present by the
473 // training algo.
474 // TODO: can we just get rid of these?
475 #define _DECL_TRAIN_FEATURES(type, name, shape, _)                             \
476   TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
477 
478 class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
479 public:
DevelopmentModeEvictAdvisor(const MachineFunction & MF,const RAGreedy & RA,MLModelRunner * Runner,const MachineBlockFrequencyInfo & MBFI,const MachineLoopInfo & Loops,Logger * Log)480   DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
481                               MLModelRunner *Runner,
482                               const MachineBlockFrequencyInfo &MBFI,
483                               const MachineLoopInfo &Loops, Logger *Log)
484       : MLEvictAdvisor(MF, RA, Runner, MBFI, Loops), Log(Log) {}
485 
486 private:
487   int64_t tryFindEvictionCandidatePosition(
488       const LiveInterval &VirtReg, const AllocationOrder &Order,
489       unsigned OrderLimit, uint8_t CostPerUseLimit,
490       const SmallVirtRegSet &FixedRegisters) const override;
491 
492   Logger *const Log;
493 };
494 
495 class DevelopmentModeEvictionAdvisorProvider final
496     : public RegAllocEvictionAdvisorProvider {
497 public:
DevelopmentModeEvictionAdvisorProvider(LLVMContext & Ctx)498   DevelopmentModeEvictionAdvisorProvider(LLVMContext &Ctx)
499       : RegAllocEvictionAdvisorProvider(AdvisorMode::Development, Ctx) {
500     if (EnableDevelopmentFeatures) {
501       InputFeatures = {RA_EVICT_FEATURES_LIST(
502           _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
503                            RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
504       TrainingInputFeatures = {
505           RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
506               RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES)
507                   RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES)
508                       TensorSpec::createSpec<float>("action_discount", {1}),
509           TensorSpec::createSpec<int32_t>("action_step_type", {1}),
510           TensorSpec::createSpec<float>("action_reward", {1})};
511     } else {
512       InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
513       TrainingInputFeatures = {
514           RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
515               TensorSpec::createSpec<float>("action_discount", {1}),
516           TensorSpec::createSpec<int32_t>("action_step_type", {1}),
517           TensorSpec::createSpec<float>("action_reward", {1})};
518     }
519     if (ModelUnderTraining.empty() && TrainingLog.empty()) {
520       Ctx.emitError("Regalloc development mode should be requested with at "
521                     "least logging enabled and/or a training model");
522       return;
523     }
524     if (ModelUnderTraining.empty())
525       Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
526     else
527       Runner = ModelUnderTrainingRunner::createAndEnsureValid(
528           Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
529     if (!Runner) {
530       Ctx.emitError("Regalloc: could not set up the model runner");
531       return;
532     }
533     if (TrainingLog.empty())
534       return;
535     std::error_code EC;
536     auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
537     if (EC) {
538       Ctx.emitError(EC.message() + ":" + TrainingLog);
539       return;
540     }
541     std::vector<TensorSpec> LFS = InputFeatures;
542     if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
543       append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
544     // We always log the output; in particular, if we're not evaluating, we
545     // don't have an output spec json file. That's why we handle the
546     // 'normal' output separately.
547     LFS.push_back(DecisionSpec);
548 
549     Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
550                                    /*IncludeReward*/ true);
551     return;
552   }
553 
554   // support for isa<> and dyn_cast.
classof(const RegAllocEvictionAdvisorProvider * R)555   static bool classof(const RegAllocEvictionAdvisorProvider *R) {
556     return R->getAdvisorMode() == AdvisorMode::Development;
557   }
558 
logRewardIfNeeded(const MachineFunction & MF,llvm::function_ref<float ()> GetReward)559   void logRewardIfNeeded(const MachineFunction &MF,
560                          llvm::function_ref<float()> GetReward) override {
561     if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
562       return;
563     // The function pass manager would run all the function passes for a
564     // function, so we assume the last context belongs to this function. If
565     // this invariant ever changes, we can implement at that time switching
566     // contexts. At this point, it'd be an error
567     if (Log->currentContext() != MF.getName()) {
568       MF.getFunction().getContext().emitError(
569           "The training log context shouldn't have had changed.");
570     }
571     if (Log->hasObservationInProgress())
572       Log->logReward<float>(GetReward());
573   }
574 
575   std::unique_ptr<RegAllocEvictionAdvisor>
getAdvisor(const MachineFunction & MF,const RAGreedy & RA,MachineBlockFrequencyInfo * MBFI,MachineLoopInfo * Loops)576   getAdvisor(const MachineFunction &MF, const RAGreedy &RA,
577              MachineBlockFrequencyInfo *MBFI, MachineLoopInfo *Loops) override {
578     if (!Runner)
579       return nullptr;
580     if (Log)
581       Log->switchContext(MF.getName());
582     assert(MBFI && Loops &&
583            "Invalid provider state: must have analysis available");
584     return std::make_unique<DevelopmentModeEvictAdvisor>(
585         MF, RA, Runner.get(), *MBFI, *Loops, Log.get());
586   }
587 
588 private:
589   std::vector<TensorSpec> InputFeatures;
590   std::vector<TensorSpec> TrainingInputFeatures;
591 
592   std::unique_ptr<MLModelRunner> Runner;
593   std::unique_ptr<Logger> Log;
594 };
595 
596 class DevelopmentModeEvictionAdvisorAnalysisLegacy final
597     : public RegAllocEvictionAdvisorAnalysisLegacy {
598 public:
DevelopmentModeEvictionAdvisorAnalysisLegacy()599   DevelopmentModeEvictionAdvisorAnalysisLegacy()
600       : RegAllocEvictionAdvisorAnalysisLegacy(AdvisorMode::Development) {}
601 
doInitialization(Module & M)602   bool doInitialization(Module &M) override {
603     Provider = std::make_unique<DevelopmentModeEvictionAdvisorProvider>(
604         M.getContext());
605     return false;
606   }
607 
logRewardIfNeeded(const MachineFunction & MF,llvm::function_ref<float ()> GetReward)608   void logRewardIfNeeded(const MachineFunction &MF,
609                          llvm::function_ref<float()> GetReward) override {
610     Provider->logRewardIfNeeded(MF, GetReward);
611   }
612 
613   // support for isa<> and dyn_cast.
classof(const RegAllocEvictionAdvisorAnalysisLegacy * R)614   static bool classof(const RegAllocEvictionAdvisorAnalysisLegacy *R) {
615     return R->getAdvisorMode() == AdvisorMode::Development;
616   }
617 
getAnalysisUsage(AnalysisUsage & AU) const618   void getAnalysisUsage(AnalysisUsage &AU) const override {
619     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
620     AU.addRequired<MachineLoopInfoWrapperPass>();
621     RegAllocEvictionAdvisorAnalysisLegacy::getAnalysisUsage(AU);
622   }
623 };
624 
625 #endif // #ifdef LLVM_HAVE_TFLITE
626 } // namespace
627 
getInitialQueueSize(const MachineFunction & MF)628 float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
629   auto &MRI = MF.getRegInfo();
630   unsigned NumUsedRegs = 0;
631   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
632     Register Reg = Register::index2VirtReg(I);
633     if (!MRI.reg_nodbg_empty(Reg))
634       ++NumUsedRegs;
635   }
636   return static_cast<float>(NumUsedRegs);
637 }
638 
MLEvictAdvisor(const MachineFunction & MF,const RAGreedy & RA,MLModelRunner * Runner,const MachineBlockFrequencyInfo & MBFI,const MachineLoopInfo & Loops)639 MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
640                                MLModelRunner *Runner,
641                                const MachineBlockFrequencyInfo &MBFI,
642                                const MachineLoopInfo &Loops)
643     : RegAllocEvictionAdvisor(MF, RA), DefaultAdvisor(MF, RA),
644       Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops),
645       InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
646   assert(this->Runner);
647   Runner->switchContext(MF.getName());
648   DoNotNormalize.set(FeatureIDs::mask);
649   DoNotNormalize.set(FeatureIDs::is_free);
650   DoNotNormalize.set(FeatureIDs::is_hint);
651   DoNotNormalize.set(FeatureIDs::is_local);
652   DoNotNormalize.set(FeatureIDs::min_stage);
653   DoNotNormalize.set(FeatureIDs::max_stage);
654   DoNotNormalize.set(FeatureIDs::progress);
655 }
656 
tryFindEvictionCandidatePosition(const LiveInterval &,const AllocationOrder &,unsigned,uint8_t,const SmallVirtRegSet &) const657 int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
658     const LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
659     const SmallVirtRegSet &) const {
660   int64_t Ret = Runner->evaluate<int64_t>();
661   assert(Ret >= 0);
662   assert(Ret <= CandidateVirtRegPos);
663   return Ret;
664 }
665 
loadInterferenceFeatures(const LiveInterval & VirtReg,MCRegister PhysReg,bool IsHint,const SmallVirtRegSet & FixedRegisters,llvm::SmallVectorImpl<float> & Largest,size_t Pos,llvm::SmallVectorImpl<LRStartEndInfo> & LRPosInfo) const666 bool MLEvictAdvisor::loadInterferenceFeatures(
667     const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
668     const SmallVirtRegSet &FixedRegisters,
669     llvm::SmallVectorImpl<float> &Largest, size_t Pos,
670     llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
671   // It is only possible to evict virtual register interference.
672   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
673     // leave unavailable
674     return false;
675   }
676 
677   const bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
678   int64_t LocalIntfs = 0;
679   float NumUrgent = 0.0f;
680 
681   // The cascade tracking is the same as in the default advisor
682   unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
683 
684   SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
685   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
686     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
687     // Different from the default heuristic, we don't make any assumptions
688     // about what having more than 10 results in the query may mean.
689     const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
690     if (IFIntervals.empty() && InterferingIntervals.empty())
691       continue;
692     if (IFIntervals.size() >= EvictInterferenceCutoff)
693       return false;
694     InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
695     for (const LiveInterval *Intf : reverse(IFIntervals)) {
696       assert(Intf->reg().isVirtual() &&
697              "Only expecting virtual register interference from query");
698       // This is the same set of legality checks as in the default case: don't
699       // try to evict fixed regs or 'done' ones. Also don't break cascades,
700       // except in the urgent case, with the same nuances used in the default
701       // heuristic.
702       // We could try sharing this between the advisors, but it may end up
703       // more complex than it is right now.
704       if (FixedRegisters.count(Intf->reg()))
705         return false;
706       if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
707         return false;
708       bool Urgent =
709           !VirtReg.isSpillable() &&
710           (Intf->isSpillable() ||
711            RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
712                RegClassInfo.getNumAllocatableRegs(
713                    MRI->getRegClass(Intf->reg())));
714 
715       unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
716       // There is a potential that the model could be adversarial and
717       // continually evict live ranges over and over again, leading to a
718       // large amount of compile time being spent in regalloc. If we hit the
719       // threshold, prevent the range from being evicted. We still let the
720       // range through if it is urgent as we are required to produce an
721       // eviction if the candidate is not spillable.
722       if (getEvictionCount(Intf->reg()) > MaxEvictionCount && !Urgent)
723         return false;
724 
725       // Only evict older cascades or live ranges without a cascade.
726       if (Cascade <= IntfCascade) {
727         if (!Urgent)
728           return false;
729         ++NumUrgent;
730       }
731 
732       LocalIntfs += (IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
733                      (!EnableLocalReassign || !canReassign(*Intf, PhysReg)));
734     }
735   }
736   // OK, so if we made it this far, this LR is an eviction candidate, load its
737   // features.
738   extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
739                   NumUrgent, LRPosInfo);
740   return true;
741 }
742 
tryFindEvictionCandidate(const LiveInterval & VirtReg,const AllocationOrder & Order,uint8_t CostPerUseLimit,const SmallVirtRegSet & FixedRegisters) const743 MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
744     const LiveInterval &VirtReg, const AllocationOrder &Order,
745     uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
746   auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
747   if (!MaybeOrderLimit)
748     return MCRegister::NoRegister;
749   unsigned OrderLimit = *MaybeOrderLimit;
750 
751   // The heuristic sets initial costs such as, if CostPerUseLimit is
752   // max<uint8_t>, then any of the costs of the legally-evictable intervals
753   // would be lower. When that happens, one of those will be selected.
754   // Therefore, we allow the candidate be selected, unless the candidate is
755   // unspillable, in which case it would be incorrect to not find a register
756   // for it.
757   const bool MustFindEviction =
758       (!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u));
759   // Number of available candidates - if 0, no need to continue.
760   size_t Available = 0;
761   // Make sure we don't have leftover partial state from an attempt where we
762   // had no available candidates and bailed out early.
763   resetInputs(*Runner);
764 
765   // Track the index->register mapping because AllocationOrder doesn't do that
766   // and we'd have to scan it.
767   // Also track their mask, to write asserts/debug.
768   CandidateRegList Regs;
769   Regs.fill({0, false});
770 
771   // Track the largest value of features seen during this eviction session. We
772   // only normalize (some of) the float features, but it's just simpler to
773   // dimension 'Largest' to all the features, especially since we have the
774   // 'DoNotNormalize' list.
775   FeaturesListNormalizer Largest(FeatureIDs::FeatureCount, 0.0);
776 
777   // Same overal idea as in the default eviction policy - we visit the values
778   // of AllocationOrder one at a time. If it's not legally available, we mask
779   // off the corresponding feature column (==do nothing because we already
780   // reset all the features to 0) Use Pos to capture the column we load
781   // features at - in AllocationOrder order.
782   size_t Pos = 0;
783   SmallVector<LRStartEndInfo, NumberOfInterferences> LRPosInfo;
784   for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
785        ++I, ++Pos) {
786     MCRegister PhysReg = *I;
787     assert(!Regs[Pos].second);
788     assert(PhysReg);
789     if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
790       continue;
791     }
792     if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
793                                  Largest, Pos, LRPosInfo)) {
794       ++Available;
795       Regs[Pos] = std::make_pair(PhysReg, true);
796     }
797   }
798   if (Available == 0) {
799     // Nothing to decide, nothing to learn.
800     assert(!MustFindEviction);
801     return MCRegister::NoRegister;
802   }
803   const size_t ValidPosLimit = Pos;
804   // If we must find eviction, the candidate should be masked out of the
805   // decision making process.
806   Regs[CandidateVirtRegPos].second = !MustFindEviction;
807   if (!MustFindEviction)
808     extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
809                     CandidateVirtRegPos, /*IsHint*/ 0,
810                     /*LocalIntfsCount*/ 0,
811                     /*NumUrgent*/ 0.0, LRPosInfo);
812   assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
813                                "nothing to allocate initially.");
814 #ifdef LLVM_HAVE_TFLITE
815   if (EnableDevelopmentFeatures) {
816     extractInstructionFeatures(
817         LRPosInfo, Runner,
818         [this](SlotIndex InputIndex) -> int {
819           auto *CurrentMachineInstruction =
820               LIS->getInstructionFromIndex(InputIndex);
821           if (!CurrentMachineInstruction) {
822             return -1;
823           }
824           return CurrentMachineInstruction->getOpcode();
825         },
826         [this](SlotIndex InputIndex) -> float {
827           auto *CurrentMachineInstruction =
828               LIS->getInstructionFromIndex(InputIndex);
829           return MBFI.getBlockFreqRelativeToEntryBlock(
830               CurrentMachineInstruction->getParent());
831         },
832         [this](SlotIndex InputIndex) -> MachineBasicBlock * {
833           auto *CurrentMachineInstruction =
834               LIS->getInstructionFromIndex(InputIndex);
835           return CurrentMachineInstruction->getParent();
836         },
837         FeatureIDs::instructions, FeatureIDs::instructions_mapping,
838         FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping,
839         LIS->getSlotIndexes()->getLastIndex());
840   }
841 #endif // #ifdef LLVM_HAVE_TFLITE
842   // Normalize the features.
843   for (auto &V : Largest)
844     V = V ? V : 1.0;
845   for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount;
846        ++FeatureIndex) {
847     if (DoNotNormalize.test(FeatureIndex))
848       continue;
849     for (size_t Pos = 0; Pos < NumberOfInterferences; ++Pos) {
850       Runner->getTensor<float>(FeatureIndex)[Pos] /= Largest[FeatureIndex];
851     }
852   }
853   *Runner->getTensor<float>(FeatureIDs::progress) =
854       static_cast<float>(RA.getQueueSize()) / InitialQSize;
855 
856   // Get a decision.
857   size_t CandidatePos = tryFindEvictionCandidatePosition(
858       VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
859   // The contract with the ML side is that CandidatePos is mask == 1 (i.e.
860   // Regs[CandidatePos].second)
861   assert(Regs[CandidatePos].second);
862   if (CandidatePos == CandidateVirtRegPos) {
863     onEviction(VirtReg.reg());
864     assert(!MustFindEviction);
865     return MCRegister::NoRegister;
866   }
867   assert(CandidatePos < ValidPosLimit);
868   (void)ValidPosLimit;
869 
870   // Update information about how many times the virtual registers being
871   // evicted have been evicted so that we can prevent the model from evicting
872   // the same ranges continually and eating compile time.
873   for (MCRegUnit Unit : TRI->regunits(Regs[CandidatePos].first)) {
874     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
875     const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
876     for (const LiveInterval *Intf : reverse(IFIntervals)) {
877       onEviction(Intf->reg());
878     }
879   }
880 
881   return Regs[CandidatePos].first;
882 }
883 
884 const LIFeatureComponents &
getLIFeatureComponents(const LiveInterval & LI) const885 MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
886   RegID ID = LI.reg().id();
887   LIFeatureComponents Empty;
888   auto I = CachedFeatures.insert(std::make_pair(ID, Empty));
889   LIFeatureComponents &Ret = I.first->getSecond();
890   if (!I.second)
891     return Ret;
892 
893   SmallPtrSet<MachineInstr *, 8> Visited;
894   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
895 
896   for (MachineRegisterInfo::reg_instr_nodbg_iterator
897            I = MRI->reg_instr_nodbg_begin(LI.reg()),
898            E = MRI->reg_instr_nodbg_end();
899        I != E;) {
900     MachineInstr *MI = &*(I++);
901 
902     ++Ret.NumDefsAndUses;
903     if (!Visited.insert(MI).second)
904       continue;
905 
906     if (MI->isIdentityCopy() || MI->isImplicitDef())
907       continue;
908 
909     bool Reads, Writes;
910     std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
911 
912     float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
913     Ret.HottestBlockFreq = std::max(Freq, Ret.HottestBlockFreq);
914 
915     Ret.R += (Reads && !Writes) * Freq;
916     Ret.W += (!Reads && Writes) * Freq;
917     Ret.RW += (Reads && Writes) * Freq;
918 
919     auto *MBB = MI->getParent();
920     auto *Loop = Loops.getLoopFor(MBB);
921     bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
922 
923     if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
924       Ret.IndVarUpdates += Freq;
925 
926     if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
927       Ret.HintWeights += Freq;
928   }
929   Ret.IsRemat = VirtRegAuxInfo::isRematerializable(
930       LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
931   return Ret;
932 }
933 
934 // Overall, this currently mimics what we do for weight calculation, but instead
935 // of accummulating the various features, we keep them separate.
extractFeatures(const SmallVectorImpl<const LiveInterval * > & Intervals,llvm::SmallVectorImpl<float> & Largest,size_t Pos,int64_t IsHint,int64_t LocalIntfsCount,float NumUrgent,SmallVectorImpl<LRStartEndInfo> & LRPosInfo) const936 void MLEvictAdvisor::extractFeatures(
937     const SmallVectorImpl<const LiveInterval *> &Intervals,
938     llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint,
939     int64_t LocalIntfsCount, float NumUrgent,
940     SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
941   int64_t NumDefsAndUses = 0;
942   int64_t NumBrokenHints = 0;
943   double R = 0.0;
944   double W = 0.0;
945   double RW = 0.0;
946   double IndVarUpdates = 0.0;
947   double HintWeights = 0.0;
948   float StartBBFreq = 0.0;
949   float EndBBFreq = 0.0;
950   float HottestBlockFreq = 0.0;
951   int32_t NumRematerializable = 0;
952   float TotalWeight = 0.0;
953 
954   SlotIndex EndSI = LIS->getSlotIndexes()->getZeroIndex();
955   SlotIndex StartSI = LIS->getSlotIndexes()->getLastIndex();
956   int64_t MaxStage = 0;
957   int64_t MinStage =
958       Intervals.empty() ? 0 : std::numeric_limits<int64_t>::max();
959 
960   for (const auto *L : Intervals) {
961     const LiveInterval &LI = *L;
962     MaxStage = std::max<int64_t>(
963         MaxStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
964     MinStage = std::min<int64_t>(
965         MinStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
966 
967     TotalWeight = std::max(TotalWeight, LI.weight());
968 
969     if (LI.beginIndex() < StartSI)
970       StartSI = LI.beginIndex();
971 
972     if (LI.endIndex() > EndSI)
973       EndSI = LI.endIndex();
974     const LIFeatureComponents &LIFC = getLIFeatureComponents(LI);
975     NumBrokenHints += VRM->hasPreferredPhys(LI.reg());
976 
977     NumDefsAndUses += LIFC.NumDefsAndUses;
978     HottestBlockFreq = std::max(HottestBlockFreq, LIFC.HottestBlockFreq);
979     R += LIFC.R;
980     W += LIFC.W;
981     RW += LIFC.RW;
982 
983     IndVarUpdates += LIFC.IndVarUpdates;
984 
985     HintWeights += LIFC.HintWeights;
986     NumRematerializable += LIFC.IsRemat;
987 
988     if (EnableDevelopmentFeatures) {
989       for (auto CurrentSegment : LI) {
990         LRPosInfo.push_back(
991             LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos});
992       }
993     }
994   }
995   size_t Size = 0;
996   if (!Intervals.empty()) {
997     StartBBFreq =
998         MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(StartSI));
999     if (EndSI >= LIS->getSlotIndexes()->getLastIndex())
1000       EndSI = LIS->getSlotIndexes()->getLastIndex().getPrevIndex();
1001     EndBBFreq =
1002         MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(EndSI));
1003     Size = StartSI.distance(EndSI);
1004   }
1005   // Set the features at the column 'Pos'.
1006 #define SET(ID, TYPE, VAL)                                                     \
1007   do {                                                                         \
1008     Runner->getTensor<TYPE>(FeatureIDs::ID)[Pos] = static_cast<TYPE>(VAL);     \
1009     if (!DoNotNormalize.test(FeatureIDs::ID))                                  \
1010       Largest[FeatureIDs::ID] =                                                \
1011           std::max(Largest[FeatureIDs::ID], static_cast<float>(VAL));          \
1012   } while (false)
1013   SET(mask, int64_t, 1);
1014   SET(is_free, int64_t, Intervals.empty());
1015   SET(nr_urgent, float, NumUrgent);
1016   SET(nr_broken_hints, float, NumBrokenHints);
1017   SET(is_hint, int64_t, IsHint);
1018   SET(is_local, int64_t, LocalIntfsCount);
1019   SET(nr_rematerializable, float, NumRematerializable);
1020   SET(nr_defs_and_uses, float, NumDefsAndUses);
1021   SET(weighed_reads_by_max, float, R);
1022   SET(weighed_writes_by_max, float, W);
1023   SET(weighed_read_writes_by_max, float, RW);
1024   SET(weighed_indvars_by_max, float, IndVarUpdates);
1025   SET(hint_weights_by_max, float, HintWeights);
1026   SET(start_bb_freq_by_max, float, StartBBFreq);
1027   SET(end_bb_freq_by_max, float, EndBBFreq);
1028   SET(hottest_bb_freq_by_max, float, HottestBlockFreq);
1029   SET(liverange_size, float, Size);
1030   SET(use_def_density, float, TotalWeight);
1031   SET(max_stage, int64_t, MaxStage);
1032   SET(min_stage, int64_t, MinStage);
1033 #undef SET
1034 }
1035 
extractInstructionFeatures(SmallVectorImpl<LRStartEndInfo> & LRPosInfo,MLModelRunner * RegallocRunner,function_ref<int (SlotIndex)> GetOpcode,function_ref<float (SlotIndex)> GetMBBFreq,function_ref<MachineBasicBlock * (SlotIndex)> GetMBBReference,const int InstructionsIndex,const int InstructionsMappingIndex,const int MBBFreqIndex,const int MBBMappingIndex,const SlotIndex LastIndex)1036 void llvm::extractInstructionFeatures(
1037     SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner,
1038     function_ref<int(SlotIndex)> GetOpcode,
1039     function_ref<float(SlotIndex)> GetMBBFreq,
1040     function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
1041     const int InstructionsIndex, const int InstructionsMappingIndex,
1042     const int MBBFreqIndex, const int MBBMappingIndex,
1043     const SlotIndex LastIndex) {
1044   // This function extracts instruction based features relevant to the eviction
1045   // problem currently being solved. This function ends up extracting two
1046   // tensors.
1047   // 1 - A vector of size max instruction count. It contains the opcodes of the
1048   // instructions spanned by all the intervals in the current instance of the
1049   // eviction problem.
1050   // 2 - A binary mapping matrix of size (LR count * max
1051   // instruction count) which maps where the LRs are live to the actual opcodes
1052   // for which they are live.
1053   // 3 - A vector of size max supported MBB count storing MBB frequencies,
1054   // encompassing all of the MBBs covered by the eviction problem.
1055   // 4 - A vector of size max instruction count of indices to members of the MBB
1056   // frequency vector, mapping each instruction to its associated MBB.
1057 
1058   // Start off by sorting the segments based on the beginning slot index.
1059   std::sort(
1060       LRPosInfo.begin(), LRPosInfo.end(),
1061       [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; });
1062   size_t InstructionIndex = 0;
1063   size_t CurrentSegmentIndex = 0;
1064   SlotIndex CurrentIndex = LRPosInfo[0].Begin;
1065   std::map<MachineBasicBlock *, size_t> VisitedMBBs;
1066   size_t CurrentMBBIndex = 0;
1067   // This loop processes all the segments sequentially by starting at the
1068   // beginning slot index of the first segment, iterating through all the slot
1069   // indices before the end slot index of that segment (while checking for
1070   // overlaps with segments that start at greater slot indices). After hitting
1071   // that end index, the current segment being processed gets bumped until they
1072   // are all processed or the max instruction count is hit, where everything is
1073   // just truncated.
1074   while (true) {
1075     // If the index that we are currently at is within the current segment and
1076     // we haven't hit the max instruction count, continue processing the current
1077     // segment.
1078     while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End &&
1079            InstructionIndex < ModelMaxSupportedInstructionCount) {
1080       int CurrentOpcode = GetOpcode(CurrentIndex);
1081       // If the current machine instruction is null, skip it
1082       if (CurrentOpcode == -1) {
1083         // If we're currently at the last index in the SlotIndex analysis,
1084         // we can't go any further, so return from the function
1085         if (CurrentIndex >= LastIndex) {
1086           return;
1087         }
1088         CurrentIndex = CurrentIndex.getNextIndex();
1089         continue;
1090       }
1091       MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex);
1092       if (VisitedMBBs.count(CurrentMBBReference) == 0) {
1093         VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex;
1094         ++CurrentMBBIndex;
1095       }
1096       extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs,
1097                           GetMBBFreq, CurrentMBBReference, RegallocRunner,
1098                           MBBFreqIndex, MBBMappingIndex);
1099       // Current code assumes we're not going to get any disjointed segments
1100       assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
1101       RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
1102           CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0;
1103       // set value in the binary mapping matrix for the current instruction
1104       auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos;
1105       RegallocRunner->getTensor<int64_t>(
1106           InstructionsMappingIndex)[CurrentSegmentPosition *
1107                                         ModelMaxSupportedInstructionCount +
1108                                     InstructionIndex] = 1;
1109       // All of the segments are sorted based on the beginning slot index, but
1110       // this doesn't mean that the beginning slot index of the next segment is
1111       // after the end segment of the one being currently processed. This while
1112       // loop checks for overlapping segments and modifies the portion of the
1113       // column in the mapping matrix for the currently processed instruction
1114       // for the LR it is checking. Also make sure that the beginning of the
1115       // current segment we're checking for overlap in is less than the current
1116       // index, otherwise we're done checking overlaps.
1117       size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1;
1118       while (OverlapCheckCurrentSegment < LRPosInfo.size() &&
1119              LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) {
1120         auto OverlapCurrentSegmentPosition =
1121             LRPosInfo[OverlapCheckCurrentSegment].Pos;
1122         if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) {
1123           RegallocRunner->getTensor<int64_t>(
1124               InstructionsMappingIndex)[OverlapCurrentSegmentPosition *
1125                                             ModelMaxSupportedInstructionCount +
1126                                         InstructionIndex] = 1;
1127         }
1128         ++OverlapCheckCurrentSegment;
1129       }
1130       ++InstructionIndex;
1131       if (CurrentIndex >= LastIndex) {
1132         return;
1133       }
1134       CurrentIndex = CurrentIndex.getNextIndex();
1135     }
1136     // if we've just finished processing through the last segment or if we've
1137     // hit the maximum number of instructions, break out of the loop.
1138     if (CurrentSegmentIndex == LRPosInfo.size() - 1 ||
1139         InstructionIndex >= ModelMaxSupportedInstructionCount) {
1140       break;
1141     }
1142     // If the segments are not overlapping, we need to move to the beginning
1143     // index of the next segment to avoid having instructions not attached to
1144     // any register.
1145     if (LRPosInfo[CurrentSegmentIndex + 1].Begin >
1146         LRPosInfo[CurrentSegmentIndex].End) {
1147       CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin;
1148     }
1149     ++CurrentSegmentIndex;
1150   }
1151 }
1152 
extractMBBFrequency(const SlotIndex CurrentIndex,const size_t CurrentInstructionIndex,std::map<MachineBasicBlock *,size_t> & VisitedMBBs,function_ref<float (SlotIndex)> GetMBBFreq,MachineBasicBlock * CurrentMBBReference,MLModelRunner * RegallocRunner,const int MBBFreqIndex,const int MBBMappingIndex)1153 void llvm::extractMBBFrequency(
1154     const SlotIndex CurrentIndex, const size_t CurrentInstructionIndex,
1155     std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
1156     function_ref<float(SlotIndex)> GetMBBFreq,
1157     MachineBasicBlock *CurrentMBBReference, MLModelRunner *RegallocRunner,
1158     const int MBBFreqIndex, const int MBBMappingIndex) {
1159   size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference];
1160   float CurrentMBBFreq = GetMBBFreq(CurrentIndex);
1161   if (CurrentMBBIndex < ModelMaxSupportedMBBCount) {
1162     RegallocRunner->getTensor<float>(MBBFreqIndex)[CurrentMBBIndex] =
1163         CurrentMBBFreq;
1164     RegallocRunner->getTensor<int64_t>(
1165         MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex;
1166   }
1167 }
1168 
1169 // Development mode-specific implementations
1170 #ifdef LLVM_HAVE_TFLITE
1171 
1172 RegAllocEvictionAdvisorAnalysisLegacy *
createDevelopmentModeAdvisorAnalysisLegacy()1173 llvm::createDevelopmentModeAdvisorAnalysisLegacy() {
1174   return new DevelopmentModeEvictionAdvisorAnalysisLegacy();
1175 }
1176 
tryFindEvictionCandidatePosition(const LiveInterval & VirtReg,const AllocationOrder & Order,unsigned OrderLimit,uint8_t CostPerUseLimit,const SmallVirtRegSet & FixedRegisters) const1177 int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
1178     const LiveInterval &VirtReg, const AllocationOrder &Order,
1179     unsigned OrderLimit, uint8_t CostPerUseLimit,
1180     const SmallVirtRegSet &FixedRegisters) const {
1181   int64_t Ret = 0;
1182   if (isa<ModelUnderTrainingRunner>(getRunner())) {
1183     Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
1184         VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
1185   } else {
1186     MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
1187         VirtReg, Order, CostPerUseLimit, FixedRegisters);
1188     // Find the index of the selected PhysReg. We need it for logging,
1189     // otherwise this is wasted cycles (but so would starting development mode
1190     // without a model nor logging)
1191     if (!PhysReg)
1192       Ret = CandidateVirtRegPos;
1193     else
1194       for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit);
1195            I != E; ++I, ++Ret)
1196         if (*I == PhysReg)
1197           break;
1198   }
1199   if (TrainingLog.empty())
1200     return Ret;
1201   // TODO(mtrofin): when we support optional rewards, this can go away. In the
1202   // meantime, we log the "pretend" reward (0) for the previous observation
1203   // before starting a new one.
1204   if (Log->hasObservationInProgress())
1205     Log->logReward<float>(0.0);
1206 
1207   Log->startObservation();
1208   size_t CurrentFeature = 0;
1209   size_t FeatureCount = EnableDevelopmentFeatures
1210                             ? FeatureIDs::FeaturesWithDevelopmentCount
1211                             : FeatureIDs::FeatureCount;
1212   for (; CurrentFeature < FeatureCount; ++CurrentFeature) {
1213     Log->logTensorValue(CurrentFeature,
1214                         reinterpret_cast<const char *>(
1215                             getRunner().getTensorUntyped(CurrentFeature)));
1216   }
1217   if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
1218     for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
1219          ++I, ++CurrentFeature)
1220       Log->logTensorValue(
1221           CurrentFeature,
1222           reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
1223   // The output is right after the features and the extra outputs
1224   Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
1225   Log->endObservation();
1226   return Ret;
1227 }
1228 
runOnMachineFunction(MachineFunction & MF)1229 bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
1230   std::optional<float> CachedReward;
1231   auto GetReward = [&]() {
1232     if (!CachedReward)
1233       CachedReward = static_cast<float>(
1234           calculateRegAllocScore(
1235               MF, getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI())
1236               .getScore());
1237     return *CachedReward;
1238   };
1239 
1240   getAnalysis<RegAllocEvictionAdvisorAnalysisLegacy>().logRewardIfNeeded(
1241       MF, GetReward);
1242   getAnalysis<RegAllocPriorityAdvisorAnalysisLegacy>().logRewardIfNeeded(
1243       MF, GetReward);
1244   return false;
1245 }
1246 #endif // #ifdef LLVM_HAVE_TFLITE
1247 
1248 RegAllocEvictionAdvisorProvider *
createReleaseModeAdvisorProvider(LLVMContext & Ctx)1249 llvm::createReleaseModeAdvisorProvider(LLVMContext &Ctx) {
1250   return new ReleaseModeEvictionAdvisorProvider(Ctx);
1251 }
1252 
1253 RegAllocEvictionAdvisorProvider *
createDevelopmentModeAdvisorProvider(LLVMContext & Ctx)1254 llvm::createDevelopmentModeAdvisorProvider(LLVMContext &Ctx) {
1255 #if defined(LLVM_HAVE_TFLITE)
1256   return new DevelopmentModeEvictionAdvisorProvider(Ctx);
1257 #endif
1258   return nullptr;
1259 }
1260 
1261 RegAllocEvictionAdvisorAnalysisLegacy *
createReleaseModeAdvisorAnalysisLegacy()1262 llvm::createReleaseModeAdvisorAnalysisLegacy() {
1263   return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
1264                  !InteractiveChannelBaseName.empty()
1265              ? new ReleaseModeEvictionAdvisorAnalysisLegacy()
1266              : nullptr;
1267 }
1268 
1269 // In all cases except development mode, we don't need scoring.
1270 #if !defined(LLVM_HAVE_TFLITE)
runOnMachineFunction(MachineFunction &)1271 bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; }
1272 #endif
1273