xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp (revision 77013d11e6483b970af25e13c9b892075742f7e5)
1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/Utils/TFUtils.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ManagedStatic.h"
24 
25 #include <vector>
26 
27 using namespace llvm;
28 
29 static cl::opt<std::string> TrainingLog(
30     "training-log", cl::Hidden,
31     cl::desc("Path where the development - mode inlining log is saved."));
32 
33 static cl::opt<std::string> TFModelUnderTrainingPath(
34     "ml-inliner-model-under-training", cl::Hidden,
35     cl::desc(R"(Path to SavedModel from the previous training iteration.
36 The directory is also expected to contain a JSON specification of the
37 outputs expected to be logged, where the first entry must be the
38 inlining decision. The file containing the specification should be
39 called output_spec.json. The expected JSON value is an array of
40 dictionaries. Each dictionary should have 2 keys:
41 
42 - "tensor_spec, followed by the TensorSpec description of the
43 output; and
44 - "logging_name", a string indicating the name to use when
45 logging the output values.
46 
47 Example:
48 [
49   {
50     "logging_name" : "some_name",
51     "tensor_spec" : {
52       "name" : "model_name",
53       "port" : 0,
54       "shape" : [2, 3],
55       "type" : "float"
56       }
57   }
58 ]
59 
60 The first value must always correspond to the decision.)"));
61 
62 static cl::opt<std::string> TFOutputSpecOverride(
63     "ml-inliner-output-spec-override", cl::Hidden,
64     cl::desc("Override the path to the output spec json file. See "
65              "-ml-inliner-model-under-training documentation for the "
66              "specification of that file."));
67 
68 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
69                                          cl::Hidden, cl::init("action_"),
70                                          cl::desc("Prefix for feature names."));
71 
72 namespace {
73 /// An InlineEvent, used by TrainingLogger.
74 struct InlineEvent {
75   /// What the default policy's decision would have been.
76   int64_t DefaultDecision = 0;
77 
78   /// What we advised. When training off the default policy, this is the same as
79   /// DefaultDecision.
80   int64_t AdvisedDecision = 0;
81 
82   /// What actually happened. This would be 'false' in the case of an inline
83   /// error, even if AdvisedDecision were true, otherwise it agrees with
84   /// AdvisedDecision.
85   bool Effect = false;
86 
87   /// What the change in size was: size_after - size_before
88   int64_t Reward = 0;
89 };
90 
91 /// Collect data we may use for training a model, and write it as a textual
92 /// Tensorflow SequenceExample
93 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
94 /// protobuf (https://developers.google.com/protocol-buffers).
95 /// Because this is a protobuf, we cannot just stream the events as they come.
96 /// Internally, TrainingLogger stores data in column-major format, because that
97 /// lines up with how TF SequenceExample represents it.
98 class ModelUnderTrainingRunner;
99 class TrainingLogger final {
100 public:
101   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
102 
103   /// Log one inlining event.
104   void logInlineEvent(const InlineEvent &Event,
105                       const MLModelRunner &ModelRunner);
106 
107   /// Print the stored tensors.
108   void print();
109 
110 private:
111   StringRef LogFileName;
112   const ModelUnderTrainingRunner *const MUTR;
113   std::unique_ptr<Logger> L;
114   std::vector<bool> Effects;
115   /// There's at least one output. We'll set this to a different value if MUTR
116   /// is avaliable.
117   size_t OutputCount = 1;
118   /// Set these 2 clearly OOB, to make sure we set them later.
119   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
120   size_t DecisionPos = std::numeric_limits<size_t>::max();
121 };
122 
123 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
124 /// the offline training scenario. Note that training happens outside of the
125 /// compiler, this facility is concerned with producing training data ("logs").
126 /// This InlineAdvisor can operate in the following modes:
127 ///
128 /// 1) collect logs for the default policy. This is useful for bootstrapping
129 /// training, which will be considerably faster by starting from a reasonable
130 /// policy.
131 ///
132 /// 2) collect logs for the ML policy, using a model from a previous
133 /// training. Potentially, that model uses internally some small random
134 /// perturbation of its weights, to induce exploration (setting this up is the
135 /// responsibility of the training algorithm). The logs would then be used to
136 /// retrain and improve on this model.
137 ///
138 /// 3) use the provided model, with no logging. This is useful for end to end
139 /// validation - the model, in this case, is a release candidate and shouldn't
140 /// have random perturbations. It is a convenience feature: rather than needing
141 /// to take the release candidate model and compile it in 'release' mode,
142 /// validate it, then potentially discard it, it's easier to just pass the model
143 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
144 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
145 /// release mode. The expectation is that a well-trained model provides a good
146 /// policy over a sufficiently diverse codebase, over many changes (i.e.
147 /// training happens seldom).
148 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
149 public:
150   DevelopmentModeMLInlineAdvisor(
151       Module &M, ModuleAnalysisManager &MAM,
152       std::unique_ptr<MLModelRunner> ModelRunner,
153       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
154       std::unique_ptr<TrainingLogger> Logger);
155 
156   size_t getTotalSizeEstimate();
157 
158   virtual ~DevelopmentModeMLInlineAdvisor();
159   void updateNativeSizeEstimate(int64_t Change) {
160     *CurrentNativeSize += Change;
161   }
162   void resetNativeSize(Function *F) {
163     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
164   }
165 
166   std::unique_ptr<MLInlineAdvice>
167   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
168 
169   Optional<size_t> getNativeSizeEstimate(const Function &F) const;
170 
171 private:
172   bool isLogging() const { return !!Logger; }
173   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
174 
175   std::function<bool(CallBase &)> GetDefaultAdvice;
176   const bool IsDoingInference;
177   std::unique_ptr<TrainingLogger> Logger;
178 
179   const Optional<int32_t> InitialNativeSize;
180   Optional<int32_t> CurrentNativeSize;
181 };
182 
183 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
184 /// decisions, for training/logging.
185 class LoggingMLInlineAdvice : public MLInlineAdvice {
186 public:
187   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
188                         OptimizationRemarkEmitter &ORE, bool Recommendation,
189                         TrainingLogger &Logger,
190                         Optional<size_t> CallerSizeEstimateBefore,
191                         Optional<size_t> CalleeSizeEstimateBefore,
192                         bool DefaultDecision, bool Mandatory = false)
193       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
194         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
195         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
196         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
197 
198   virtual ~LoggingMLInlineAdvice() = default;
199 
200 private:
201   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
202     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
203   }
204   void recordInliningImpl() override {
205     MLInlineAdvice::recordInliningImpl();
206     getAdvisor()->resetNativeSize(Caller);
207     int Reward = std::numeric_limits<int>::max();
208     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
209         !getAdvisor()->isForcedToStop()) {
210       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
211                             *CalleeSizeEstimateBefore;
212       Reward = NativeSizeAfter -
213                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
214       getAdvisor()->updateNativeSizeEstimate(Reward);
215     }
216     log(Reward, /*Success=*/true);
217   }
218 
219   void recordInliningWithCalleeDeletedImpl() override {
220     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
221     getAdvisor()->resetNativeSize(Caller);
222     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
223         !getAdvisor()->isForcedToStop()) {
224       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
225       int Reward = NativeSizeAfter -
226                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
227       getAdvisor()->updateNativeSizeEstimate(Reward);
228       log(Reward, /*Success=*/true);
229     }
230   }
231 
232   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
233     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
234     log(NoReward, /*Success=*/false);
235   }
236 
237   void recordUnattemptedInliningImpl() override {
238     MLInlineAdvice::recordUnattemptedInliningImpl();
239     log(NoReward, /*Success=*/false);
240   }
241 
242   void log(int64_t Reward, bool Success) {
243     if (Mandatory)
244       return;
245     InlineEvent Event;
246     Event.AdvisedDecision = isInliningRecommended();
247     Event.DefaultDecision = DefaultDecision;
248     Event.Effect = Success;
249     Event.Reward = Reward;
250     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
251   }
252 
253   static const int64_t NoReward = 0;
254   TrainingLogger &Logger;
255   const Optional<size_t> CallerSizeEstimateBefore;
256   const Optional<size_t> CalleeSizeEstimateBefore;
257   const int64_t DefaultDecision;
258   const int64_t Mandatory;
259 };
260 
261 /// A pseudo model runner. We use it to store feature values when collecting
262 /// logs for the default policy, but never ask it to 'run'.
263 class NoInferenceModelRunner : public MLModelRunner {
264 public:
265   NoInferenceModelRunner(LLVMContext &Ctx)
266       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
267   void setFeature(FeatureIndex Index, int64_t Value) override {
268     Features[static_cast<int>(Index)] = Value;
269   }
270 
271   int64_t getFeature(int Index) const override { return Features[Index]; }
272   bool run() override {
273     llvm_unreachable("We shouldn't call run on this model runner.");
274   }
275 
276 private:
277   InlineFeatures Features;
278 };
279 
280 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
281 /// to dynamically load and evaluate a TF SavedModel
282 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
283 /// sacrificed for ease of use while training.
284 class ModelUnderTrainingRunner final : public MLModelRunner {
285 public:
286   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
287 
288   bool run() override;
289 
290   // Disallows copy and assign.
291   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
292   ModelUnderTrainingRunner &
293   operator=(const ModelUnderTrainingRunner &) = delete;
294 
295   void setFeature(FeatureIndex Index, int64_t Value) override;
296   int64_t getFeature(int Index) const override;
297   bool isValid() const { return !!Evaluator; }
298 
299   const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const {
300     return OutputSpecs;
301   }
302 
303   const Optional<TFModelEvaluator::EvaluationResult> &
304   lastEvaluationResult() const {
305     return LastEvaluationResult;
306   }
307 
308 private:
309   std::unique_ptr<TFModelEvaluator> Evaluator;
310   std::vector<LoggedFeatureSpec> OutputSpecs;
311   Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
312 
313   // The training framework needs some additional features.
314   const std::vector<TensorSpec> TrainingOnlyFeatures{
315       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
316       TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
317       TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
318       TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
319 };
320 } // namespace
321 
322 TrainingLogger::TrainingLogger(StringRef LogFileName,
323                                const ModelUnderTrainingRunner *MUTR)
324     : LogFileName(LogFileName), MUTR(MUTR) {
325   // The first output is the inlining decision.
326   if (MUTR)
327     OutputCount = MUTR->outputLoggedFeatureSpecs().size();
328   std::vector<LoggedFeatureSpec> FT;
329 
330   for (size_t I = 0; I < NumberOfFeatures; ++I)
331     FT.push_back(
332         {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
333   if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
334     append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
335 
336   DefaultDecisionPos = FT.size();
337   FT.push_back(
338       {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
339 
340   DecisionPos = FT.size();
341   FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
342 
343   L = std::make_unique<Logger>(
344       FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
345       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
346 }
347 
348 /// Log one inlining event.
349 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
350                                     const MLModelRunner &ModelRunner) {
351   size_t CurrentFeature = 0;
352   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
353     int64_t F = ModelRunner.getFeature(CurrentFeature);
354     L->logTensorValue(CurrentFeature, &F);
355   }
356 
357   for (size_t I = 1; I < OutputCount; ++I) {
358     const auto &Result = *MUTR->lastEvaluationResult();
359     auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec;
360     const char *RawData =
361         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
362     L->logTensorValue(CurrentFeature, RawData,
363                       Spec.getElementCount() * Spec.getElementByteSize());
364     ++CurrentFeature;
365   }
366 
367   assert(CurrentFeature == DefaultDecisionPos);
368   L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
369   L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
370   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
371     L->logReward(Event.Reward);
372 
373   // For debugging / later use
374   Effects.push_back(Event.Effect);
375 }
376 
377 void TrainingLogger::print() {
378   std::error_code EC;
379   raw_fd_ostream OutFile(LogFileName, EC);
380   L->print(OutFile);
381 }
382 
383 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
384     Module &M, ModuleAnalysisManager &MAM,
385     std::unique_ptr<MLModelRunner> ModelRunner,
386     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
387     std::unique_ptr<TrainingLogger> Logger)
388     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
389       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
390       Logger(std::move(Logger)),
391       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
392       CurrentNativeSize(InitialNativeSize) {
393   // We cannot have the case of neither inference nor logging.
394   assert(IsDoingInference || isLogging());
395 }
396 
397 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
398   if (isLogging())
399     Logger->print();
400 }
401 
402 Optional<size_t>
403 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
404   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
405     return None;
406   auto &R =
407       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
408   if (!R) {
409     F.getParent()->getContext().emitError(
410         "Native size estimator is not present.");
411     return 0;
412   }
413   return *R;
414 }
415 
416 std::unique_ptr<MLInlineAdvice>
417 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
418   return std::make_unique<LoggingMLInlineAdvice>(
419       /*Advisor=*/this,
420       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
421       /*Logger=*/*Logger,
422       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
423       /*CalleeSizeEstimateBefore=*/
424       getNativeSizeEstimate(*CB.getCalledFunction()),
425       /*DefaultDecision=*/true, /*Mandatory*/ true);
426 }
427 
428 std::unique_ptr<MLInlineAdvice>
429 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
430     CallBase &CB, OptimizationRemarkEmitter &ORE) {
431   if (IsDoingInference && !isLogging())
432     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
433 
434   bool DefaultAdvice = GetDefaultAdvice(CB);
435   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
436   return std::make_unique<LoggingMLInlineAdvice>(
437       /*Advisor=*/this,
438       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
439       /*Logger=*/*Logger,
440       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
441       /*CalleeSizeEstimateBefore=*/
442       getNativeSizeEstimate(*CB.getCalledFunction()),
443       /*DefaultDecision=*/DefaultAdvice);
444 }
445 
446 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
447   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
448     return 0;
449   size_t Ret = 0;
450   for (auto &F : M) {
451     if (F.isDeclaration())
452       continue;
453     if (isFunctionDeleted(&F))
454       continue;
455     Ret += *getNativeSizeEstimate(F);
456   }
457   return Ret;
458 }
459 
460 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
461                                                    const std::string &ModelPath)
462     : MLModelRunner(Ctx) {
463   std::vector<TensorSpec> InputSpecs;
464   for (size_t I = 0; I < NumberOfFeatures; ++I)
465     InputSpecs.push_back(
466         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
467   append_range(InputSpecs, TrainingOnlyFeatures);
468   if (auto MaybeOutSpecs =
469           loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride))
470     OutputSpecs = std::move(*MaybeOutSpecs);
471   else
472     return;
473 
474   Evaluator = std::make_unique<TFModelEvaluator>(
475       ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
476       OutputSpecs.size());
477   if (!Evaluator || !Evaluator->isValid()) {
478     Ctx.emitError("Failed to create inliner saved model evaluator");
479     Evaluator.reset();
480     return;
481   }
482 }
483 
484 bool ModelUnderTrainingRunner::run() {
485   LastEvaluationResult = Evaluator->evaluate();
486   if (!LastEvaluationResult.hasValue()) {
487     Ctx.emitError("Error evaluating model.");
488     return false;
489   }
490   int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0);
491   return static_cast<bool>(Decision);
492 }
493 
494 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
495   return *Evaluator->getInput<int64_t>(Index);
496 }
497 
498 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
499   size_t NumericIndex = static_cast<size_t>(Index);
500   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
501 }
502 
503 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
504     Module &M, ModuleAnalysisManager &MAM,
505     std::function<bool(CallBase &)> GetDefaultAdvice) {
506   auto &Ctx = M.getContext();
507   std::unique_ptr<MLModelRunner> Runner;
508   ModelUnderTrainingRunner *MUTRPtr = nullptr;
509   bool IsDoingInference = false;
510   if (TFModelUnderTrainingPath.empty())
511     Runner.reset(new NoInferenceModelRunner(Ctx));
512   else {
513     auto MUTR = std::make_unique<ModelUnderTrainingRunner>(
514         Ctx, TFModelUnderTrainingPath);
515     if (!MUTR || !MUTR->isValid()) {
516       Ctx.emitError("Could not load the policy model from the provided path");
517       return nullptr;
518     }
519     IsDoingInference = true;
520     MUTRPtr = MUTR.get();
521     Runner = std::move(MUTR);
522   }
523   std::unique_ptr<TrainingLogger> Logger;
524   if (!TrainingLog.empty())
525     Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr);
526 
527   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
528       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference,
529       std::move(Logger));
530 }
531 #endif // defined(LLVM_HAVE_TF_API)
532