xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a model runner using TFLite, allowing the
10 // loading of a model from a command line option.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/TensorSpec.h"
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TFLITE)
16 
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Analysis/CallGraph.h"
19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
20 #include "llvm/Analysis/MLInlineAdvisor.h"
21 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
22 #include "llvm/Analysis/NoInferenceModelRunner.h"
23 #include "llvm/Analysis/Utils/TFUtils.h"
24 #include "llvm/Analysis/Utils/TrainingLogger.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/ManagedStatic.h"
29 
30 #include <vector>
31 #include <optional>
32 
33 using namespace llvm;
34 
35 static cl::opt<std::string> TrainingLog(
36     "training-log", cl::Hidden,
37     cl::desc("Path where the development - mode inlining log is saved."));
38 
39 static cl::opt<std::string> TFModelUnderTrainingPath(
40     "ml-inliner-model-under-training", cl::Hidden,
41     cl::desc(R"(Path to SavedModel from the previous training iteration.
42 The directory is also expected to contain a JSON specification of the
43 outputs expected to be logged, where the first entry must be the
44 inlining decision. The file containing the specification should be
45 called output_spec.json. The expected JSON value is an array of
46 dictionaries. Each dictionary should have 2 keys:
47 
48 - "tensor_spec, followed by the TensorSpec description of the
49 output; and
50 - "logging_name", a string indicating the name to use when
51 logging the output values.
52 
53 Example:
54 [
55   {
56     "logging_name" : "some_name",
57     "tensor_spec" : {
58       "name" : "model_name",
59       "port" : 0,
60       "shape" : [2, 3],
61       "type" : "float"
62       }
63   }
64 ]
65 
66 The first value must always correspond to the decision.)"));
67 
68 static cl::opt<std::string> TFOutputSpecOverride(
69     "ml-inliner-output-spec-override", cl::Hidden,
70     cl::desc("Override the path to the output spec json file. See "
71              "-ml-inliner-model-under-training documentation for the "
72              "specification of that file."));
73 
74 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
75                                          cl::Hidden, cl::init("action_"),
76                                          cl::desc("Prefix for feature names."));
77 
78 namespace {
79 /// An InlineEvent, used by TrainingLogger.
80 struct InlineEvent {
81   /// What the default policy's decision would have been.
82   int64_t DefaultDecision = 0;
83 
84   /// What we advised. When training off the default policy, this is the same as
85   /// DefaultDecision.
86   int64_t AdvisedDecision = 0;
87 
88   /// What actually happened. This would be 'false' in the case of an inline
89   /// error, even if AdvisedDecision were true, otherwise it agrees with
90   /// AdvisedDecision.
91   bool Effect = false;
92 
93   /// What the change in size was: size_after - size_before
94   int64_t Reward = 0;
95 };
96 
97 /// Collect data we may use for training a model.
98 class TrainingLogger final {
99 public:
100   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
101 
102   /// Log one inlining event.
103   void logInlineEvent(const InlineEvent &Event,
104                       const MLModelRunner &ModelRunner);
105 
106 private:
107   StringRef LogFileName;
108   const ModelUnderTrainingRunner *const MUTR;
109   std::unique_ptr<Logger> L;
110   BitVector Effects;
111   /// Set these 2 clearly OOB, to make sure we set them later.
112   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
113   size_t DecisionPos = std::numeric_limits<size_t>::max();
114 };
115 
116 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
117 /// the offline training scenario. Note that training happens outside of the
118 /// compiler, this facility is concerned with producing training data ("logs").
119 /// This InlineAdvisor can operate in the following modes:
120 ///
121 /// 1) collect logs for the default policy. This is useful for bootstrapping
122 /// training, which will be considerably faster by starting from a reasonable
123 /// policy.
124 ///
125 /// 2) collect logs for the ML policy, using a model from a previous
126 /// training. Potentially, that model uses internally some small random
127 /// perturbation of its weights, to induce exploration (setting this up is the
128 /// responsibility of the training algorithm). The logs would then be used to
129 /// retrain and improve on this model.
130 ///
131 /// 3) use the provided model, with no logging. This is useful for end to end
132 /// validation - the model, in this case, is a release candidate and shouldn't
133 /// have random perturbations. It is a convenience feature: rather than needing
134 /// to take the release candidate model and compile it in 'release' mode,
135 /// validate it, then potentially discard it, it's easier to just pass the model
136 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
137 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
138 /// release mode. The expectation is that a well-trained model provides a good
139 /// policy over a sufficiently diverse codebase, over many changes (i.e.
140 /// training happens seldom).
141 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
142 public:
143   DevelopmentModeMLInlineAdvisor(
144       Module &M, ModuleAnalysisManager &MAM,
145       std::unique_ptr<MLModelRunner> ModelRunner,
146       std::function<bool(CallBase &)> GetDefaultAdvice,
147       std::unique_ptr<TrainingLogger> Logger);
148 
149   size_t getTotalSizeEstimate();
150 
updateNativeSizeEstimate(int64_t Change)151   void updateNativeSizeEstimate(int64_t Change) {
152     *CurrentNativeSize += Change;
153   }
154   void resetNativeSize(Function *F) {
155     PreservedAnalyses PA = PreservedAnalyses::all();
156     PA.abandon<InlineSizeEstimatorAnalysis>();
157     FAM.invalidate(*F, PA);
158   }
159 
160   std::unique_ptr<MLInlineAdvice>
161   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
162 
163   std::optional<size_t> getNativeSizeEstimate(const Function &F) const;
164 
165 private:
166   bool isLogging() const { return !!Logger; }
167   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
168 
169   const bool IsDoingInference;
170   std::unique_ptr<TrainingLogger> Logger;
171 
172   const std::optional<int32_t> InitialNativeSize;
173   std::optional<int32_t> CurrentNativeSize;
174 };
175 
176 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
177 /// decisions, for training/logging.
178 class LoggingMLInlineAdvice : public MLInlineAdvice {
179 public:
180   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
181                         OptimizationRemarkEmitter &ORE, bool Recommendation,
182                         TrainingLogger &Logger,
183                         std::optional<size_t> CallerSizeEstimateBefore,
184                         std::optional<size_t> CalleeSizeEstimateBefore,
185                         bool DefaultDecision, bool Mandatory = false)
186       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
187         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
188         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
189         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
190 
191   virtual ~LoggingMLInlineAdvice() = default;
192 
193 private:
194   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
195     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
196   }
197   void recordInliningImpl() override {
198     MLInlineAdvice::recordInliningImpl();
199     getAdvisor()->resetNativeSize(Caller);
200     int Reward = std::numeric_limits<int>::max();
201     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
202         !getAdvisor()->isForcedToStop()) {
203       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
204                             *CalleeSizeEstimateBefore;
205       Reward = NativeSizeAfter -
206                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
207       getAdvisor()->updateNativeSizeEstimate(Reward);
208     }
209     log(Reward, /*Success=*/true);
210   }
211 
212   void recordInliningWithCalleeDeletedImpl() override {
213     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
214     getAdvisor()->resetNativeSize(Caller);
215     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
216         !getAdvisor()->isForcedToStop()) {
217       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
218       int Reward = NativeSizeAfter -
219                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
220       getAdvisor()->updateNativeSizeEstimate(Reward);
221       log(Reward, /*Success=*/true);
222     } else {
223       log(NoReward, /*Success=*/true);
224     }
225   }
226 
227   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
228     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
229     log(NoReward, /*Success=*/false);
230   }
231 
232   void recordUnattemptedInliningImpl() override {
233     MLInlineAdvice::recordUnattemptedInliningImpl();
234     log(NoReward, /*Success=*/false);
235   }
236 
237   void log(int64_t Reward, bool Success) {
238     if (Mandatory)
239       return;
240     InlineEvent Event;
241     Event.AdvisedDecision = isInliningRecommended();
242     Event.DefaultDecision = DefaultDecision;
243     Event.Effect = Success;
244     Event.Reward = Reward;
245     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
246   }
247 
248   static const int64_t NoReward = 0;
249   TrainingLogger &Logger;
250   const std::optional<size_t> CallerSizeEstimateBefore;
251   const std::optional<size_t> CalleeSizeEstimateBefore;
252   const int64_t DefaultDecision;
253   const int64_t Mandatory;
254 };
255 
256 static const std::vector<TensorSpec> TrainingOnlyFeatures{
257     TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
258     TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
259     TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
260 
261 static const std::vector<TensorSpec> getInputFeatures() {
262   std::vector<TensorSpec> InputSpecs;
263   for (size_t I = 0; I < NumberOfFeatures; ++I)
264     InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
265         TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
266   append_range(InputSpecs, TrainingOnlyFeatures);
267   return InputSpecs;
268 }
269 
270 } // namespace
271 
272 TrainingLogger::TrainingLogger(StringRef LogFileName,
273                                const ModelUnderTrainingRunner *MUTR)
274     : LogFileName(LogFileName), MUTR(MUTR) {
275   // The first output is the inlining decision.
276   std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end());
277 
278   if (MUTR)
279     append_range(FT, MUTR->extraOutputsForLoggingSpecs());
280 
281   DefaultDecisionPos = FT.size();
282   FT.push_back(DefaultDecisionSpec);
283 
284   DecisionPos = FT.size();
285   FT.push_back(InlineDecisionSpec);
286   std::error_code EC;
287   auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
288   if (EC)
289     dbgs() << (EC.message() + ":" + TrainingLog);
290 
291   L = std::make_unique<Logger>(
292       std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
293       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
294   L->switchContext("");
295 }
296 
297 /// Log one inlining event.
298 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
299                                     const MLModelRunner &ModelRunner) {
300   L->startObservation();
301   size_t CurrentFeature = 0;
302   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature)
303     L->logTensorValue(CurrentFeature,
304                       reinterpret_cast<const char *>(
305                           ModelRunner.getTensorUntyped(CurrentFeature)));
306 
307   if (MUTR)
308     for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
309       const char *RawData =
310           reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
311       L->logTensorValue(CurrentFeature, RawData);
312       ++CurrentFeature;
313     }
314 
315   assert(CurrentFeature == DefaultDecisionPos);
316   L->logTensorValue(DefaultDecisionPos,
317                     reinterpret_cast<const char *>(&Event.DefaultDecision));
318   L->logTensorValue(DecisionPos,
319                     reinterpret_cast<const char *>(&Event.AdvisedDecision));
320   L->endObservation();
321   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
322     L->logReward(Event.Reward);
323 
324   // For debugging / later use
325   Effects.push_back(Event.Effect);
326 }
327 
328 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
329     Module &M, ModuleAnalysisManager &MAM,
330     std::unique_ptr<MLModelRunner> ModelRunner,
331     std::function<bool(CallBase &)> GetDefaultAdvice,
332     std::unique_ptr<TrainingLogger> Logger)
333     : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice),
334       IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
335       Logger(std::move(Logger)),
336       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
337       CurrentNativeSize(InitialNativeSize) {
338   // We cannot have the case of neither inference nor logging.
339   assert(IsDoingInference || isLogging());
340 }
341 
342 std::optional<size_t>
343 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
344   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
345     return std::nullopt;
346   auto &R =
347       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
348   if (!R) {
349     F.getParent()->getContext().emitError(
350         "Native size estimator is not present.");
351     return 0;
352   }
353   return *R;
354 }
355 
356 std::unique_ptr<MLInlineAdvice>
357 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
358   return std::make_unique<LoggingMLInlineAdvice>(
359       /*Advisor=*/this,
360       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
361       /*Logger=*/*Logger,
362       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
363       /*CalleeSizeEstimateBefore=*/
364       getNativeSizeEstimate(*CB.getCalledFunction()),
365       /*DefaultDecision=*/true, /*Mandatory*/ true);
366 }
367 
368 std::unique_ptr<MLInlineAdvice>
369 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
370     CallBase &CB, OptimizationRemarkEmitter &ORE) {
371   if (IsDoingInference && !isLogging())
372     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
373 
374   bool DefaultAdvice = GetDefaultAdvice(CB);
375   auto Recommendation =
376       IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
377                        : DefaultAdvice;
378   return std::make_unique<LoggingMLInlineAdvice>(
379       /*Advisor=*/this,
380       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
381       /*Logger=*/*Logger,
382       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
383       /*CalleeSizeEstimateBefore=*/
384       getNativeSizeEstimate(*CB.getCalledFunction()),
385       /*DefaultDecision=*/DefaultAdvice);
386 }
387 
388 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
389   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
390     return 0;
391   size_t Ret = 0;
392   for (auto &F : M) {
393     if (F.isDeclaration())
394       continue;
395     Ret += *getNativeSizeEstimate(F);
396   }
397   return Ret;
398 }
399 
400 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
401     Module &M, ModuleAnalysisManager &MAM,
402     std::function<bool(CallBase &)> GetDefaultAdvice) {
403   auto &Ctx = M.getContext();
404   std::unique_ptr<MLModelRunner> Runner;
405   if (TFModelUnderTrainingPath.empty())
406     Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
407   else
408     Runner = ModelUnderTrainingRunner::createAndEnsureValid(
409         Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
410         TFOutputSpecOverride);
411   if (!Runner)
412     return nullptr;
413   std::unique_ptr<TrainingLogger> Logger;
414   if (!TrainingLog.empty())
415     Logger = std::make_unique<TrainingLogger>(
416         TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
417 
418   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
419       M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
420 }
421 #endif // defined(LLVM_HAVE_TFLITE)
422