1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a model runner using Tensorflow C APIs, allowing the 10 // loading of a model from a command line option. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Config/config.h" 14 #if defined(LLVM_HAVE_TF_API) 15 16 #include "llvm/Analysis/CallGraph.h" 17 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 18 #include "llvm/Analysis/MLInlineAdvisor.h" 19 #include "llvm/Analysis/Utils/TFUtils.h" 20 #include "llvm/IR/LLVMContext.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/ManagedStatic.h" 23 24 #include <vector> 25 26 using namespace llvm; 27 28 static cl::opt<std::string> TrainingLog( 29 "training-log", cl::Hidden, 30 cl::desc("Path where the development - mode inlining log is saved.")); 31 32 static cl::opt<std::string> TFModelUnderTrainingPath( 33 "ml-inliner-model-under-training", cl::Hidden, 34 cl::desc(R"(Path to SavedModel from the previous training iteration. 35 The directory is also expected to contain a JSON specification of the 36 outputs expected to be logged, where the first entry must be the 37 inlining decision. The file containing the specification should be 38 called output_spec.json. The expected JSON value is an array of 39 dictionaries. Each dictionary should have 2 keys: 40 41 - "tensor_spec, followed by the TensorSpec description of the 42 output; and 43 - "logging_name", a string indicating the name to use when 44 logging the output values. 45 46 Example: 47 [ 48 { 49 "logging_name" : "some_name", 50 "tensor_spec" : { 51 "name" : "model_name", 52 "port" : 0, 53 "shape" : [2, 3], 54 "type" : "float" 55 } 56 } 57 ] 58 59 The first value must always correspond to the decision.)")); 60 61 static cl::opt<std::string> TFOutputSpecOverride( 62 "ml-inliner-output-spec-override", cl::Hidden, 63 cl::desc("Override the path to the output spec json file. See " 64 "-ml-inliner-model-under-training documentation for the " 65 "specification of that file.")); 66 67 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 68 cl::Hidden, cl::init("action_"), 69 cl::desc("Prefix for feature names.")); 70 71 namespace { 72 /// An InlineEvent, used by TrainingLogger. 73 struct InlineEvent { 74 /// What the default policy's decision would have been. 75 int64_t DefaultDecision = 0; 76 77 /// What we advised. When training off the default policy, this is the same as 78 /// DefaultDecision. 79 int64_t AdvisedDecision = 0; 80 81 /// What actually happened. This would be 'false' in the case of an inline 82 /// error, even if AdvisedDecision were true, otherwise it agrees with 83 /// AdvisedDecision. 84 bool Effect = false; 85 86 /// What the change in size was: size_after - size_before 87 int64_t Reward = 0; 88 }; 89 90 /// Collect data we may use for training a model, and write it as a textual 91 /// Tensorflow SequenceExample 92 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 93 /// protobuf (https://developers.google.com/protocol-buffers). 94 /// Because this is a protobuf, we cannot just stream the events as they come. 95 /// Internally, TrainingLogger stores data in column-major format, because that 96 /// lines up with how TF SequenceExample represents it. 97 class ModelUnderTrainingRunner; 98 class TrainingLogger final { 99 public: 100 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 101 102 /// Log one inlining event. 103 void logInlineEvent(const InlineEvent &Event, 104 const MLModelRunner &ModelRunner); 105 106 /// Print the stored tensors. 107 void print(); 108 109 private: 110 StringRef LogFileName; 111 const ModelUnderTrainingRunner *const MUTR; 112 std::unique_ptr<Logger> L; 113 std::vector<bool> Effects; 114 /// There's at least one output. We'll set this to a different value if MUTR 115 /// is avaliable. 116 size_t OutputCount = 1; 117 /// Set these 2 clearly OOB, to make sure we set them later. 118 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 119 size_t DecisionPos = std::numeric_limits<size_t>::max(); 120 }; 121 122 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 123 /// the offline training scenario. Note that training happens outside of the 124 /// compiler, this facility is concerned with producing training data ("logs"). 125 /// This InlineAdvisor can operate in the following modes: 126 /// 127 /// 1) collect logs for the default policy. This is useful for bootstrapping 128 /// training, which will be considerably faster by starting from a reasonable 129 /// policy. 130 /// 131 /// 2) collect logs for the ML policy, using a model from a previous 132 /// training. Potentially, that model uses internally some small random 133 /// perturbation of its weights, to induce exploration (setting this up is the 134 /// responsibility of the training algorithm). The logs would then be used to 135 /// retrain and improve on this model. 136 /// 137 /// 3) use the provided model, with no logging. This is useful for end to end 138 /// validation - the model, in this case, is a release candidate and shouldn't 139 /// have random perturbations. It is a convenience feature: rather than needing 140 /// to take the release candidate model and compile it in 'release' mode, 141 /// validate it, then potentially discard it, it's easier to just pass the model 142 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 143 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 144 /// release mode. The expectation is that a well-trained model provides a good 145 /// policy over a sufficiently diverse codebase, over many changes (i.e. 146 /// training happens seldom). 147 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 148 public: 149 DevelopmentModeMLInlineAdvisor( 150 Module &M, ModuleAnalysisManager &MAM, 151 std::unique_ptr<MLModelRunner> ModelRunner, 152 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 153 std::unique_ptr<TrainingLogger> Logger); 154 155 size_t getTotalSizeEstimate(); 156 157 virtual ~DevelopmentModeMLInlineAdvisor(); 158 void updateNativeSizeEstimate(int64_t Change) { 159 *CurrentNativeSize += Change; 160 } 161 void resetNativeSize(Function *F) { 162 PreservedAnalyses PA = PreservedAnalyses::all(); 163 PA.abandon<InlineSizeEstimatorAnalysis>(); 164 FAM.invalidate(*F, PA); 165 } 166 167 std::unique_ptr<MLInlineAdvice> 168 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 169 170 Optional<size_t> getNativeSizeEstimate(const Function &F) const; 171 172 private: 173 bool isLogging() const { return !!Logger; } 174 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 175 176 std::function<bool(CallBase &)> GetDefaultAdvice; 177 const bool IsDoingInference; 178 std::unique_ptr<TrainingLogger> Logger; 179 180 const Optional<int32_t> InitialNativeSize; 181 Optional<int32_t> CurrentNativeSize; 182 }; 183 184 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 185 /// decisions, for training/logging. 186 class LoggingMLInlineAdvice : public MLInlineAdvice { 187 public: 188 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 189 OptimizationRemarkEmitter &ORE, bool Recommendation, 190 TrainingLogger &Logger, 191 Optional<size_t> CallerSizeEstimateBefore, 192 Optional<size_t> CalleeSizeEstimateBefore, 193 bool DefaultDecision, bool Mandatory = false) 194 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 195 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 196 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 197 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 198 199 virtual ~LoggingMLInlineAdvice() = default; 200 201 private: 202 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 203 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 204 } 205 void recordInliningImpl() override { 206 MLInlineAdvice::recordInliningImpl(); 207 getAdvisor()->resetNativeSize(Caller); 208 int Reward = std::numeric_limits<int>::max(); 209 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 210 !getAdvisor()->isForcedToStop()) { 211 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 212 *CalleeSizeEstimateBefore; 213 Reward = NativeSizeAfter - 214 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 215 getAdvisor()->updateNativeSizeEstimate(Reward); 216 } 217 log(Reward, /*Success=*/true); 218 } 219 220 void recordInliningWithCalleeDeletedImpl() override { 221 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 222 getAdvisor()->resetNativeSize(Caller); 223 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 224 !getAdvisor()->isForcedToStop()) { 225 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 226 int Reward = NativeSizeAfter - 227 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 228 getAdvisor()->updateNativeSizeEstimate(Reward); 229 log(Reward, /*Success=*/true); 230 } else { 231 log(NoReward, /*Success=*/true); 232 } 233 } 234 235 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 236 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 237 log(NoReward, /*Success=*/false); 238 } 239 240 void recordUnattemptedInliningImpl() override { 241 MLInlineAdvice::recordUnattemptedInliningImpl(); 242 log(NoReward, /*Success=*/false); 243 } 244 245 void log(int64_t Reward, bool Success) { 246 if (Mandatory) 247 return; 248 InlineEvent Event; 249 Event.AdvisedDecision = isInliningRecommended(); 250 Event.DefaultDecision = DefaultDecision; 251 Event.Effect = Success; 252 Event.Reward = Reward; 253 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 254 } 255 256 static const int64_t NoReward = 0; 257 TrainingLogger &Logger; 258 const Optional<size_t> CallerSizeEstimateBefore; 259 const Optional<size_t> CalleeSizeEstimateBefore; 260 const int64_t DefaultDecision; 261 const int64_t Mandatory; 262 }; 263 264 /// A pseudo model runner. We use it to store feature values when collecting 265 /// logs for the default policy, but never ask it to 'run'. 266 class NoInferenceModelRunner : public MLModelRunner { 267 public: 268 NoInferenceModelRunner(LLVMContext &Ctx) 269 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 270 void setFeature(FeatureIndex Index, int64_t Value) override { 271 Features[static_cast<int>(Index)] = Value; 272 } 273 274 int64_t getFeature(int Index) const override { return Features[Index]; } 275 bool run() override { 276 llvm_unreachable("We shouldn't call run on this model runner."); 277 } 278 279 private: 280 InlineFeatures Features; 281 }; 282 283 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 284 /// to dynamically load and evaluate a TF SavedModel 285 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 286 /// sacrificed for ease of use while training. 287 class ModelUnderTrainingRunner final : public MLModelRunner { 288 public: 289 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 290 291 bool run() override; 292 293 // Disallows copy and assign. 294 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 295 ModelUnderTrainingRunner & 296 operator=(const ModelUnderTrainingRunner &) = delete; 297 298 void setFeature(FeatureIndex Index, int64_t Value) override; 299 int64_t getFeature(int Index) const override; 300 bool isValid() const { return !!Evaluator; } 301 302 const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const { 303 return OutputSpecs; 304 } 305 306 const Optional<TFModelEvaluator::EvaluationResult> & 307 lastEvaluationResult() const { 308 return LastEvaluationResult; 309 } 310 311 private: 312 std::unique_ptr<TFModelEvaluator> Evaluator; 313 std::vector<LoggedFeatureSpec> OutputSpecs; 314 Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult; 315 316 // The training framework needs some additional features. 317 const std::vector<TensorSpec> TrainingOnlyFeatures{ 318 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 319 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 320 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 321 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 322 }; 323 } // namespace 324 325 TrainingLogger::TrainingLogger(StringRef LogFileName, 326 const ModelUnderTrainingRunner *MUTR) 327 : LogFileName(LogFileName), MUTR(MUTR) { 328 // The first output is the inlining decision. 329 if (MUTR) 330 OutputCount = MUTR->outputLoggedFeatureSpecs().size(); 331 std::vector<LoggedFeatureSpec> FT; 332 333 for (size_t I = 0; I < NumberOfFeatures; ++I) 334 FT.push_back( 335 {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None}); 336 if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) 337 append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); 338 339 DefaultDecisionPos = FT.size(); 340 FT.push_back( 341 {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); 342 343 DecisionPos = FT.size(); 344 FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); 345 346 L = std::make_unique<Logger>( 347 FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 348 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 349 } 350 351 /// Log one inlining event. 352 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 353 const MLModelRunner &ModelRunner) { 354 size_t CurrentFeature = 0; 355 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { 356 int64_t F = ModelRunner.getFeature(CurrentFeature); 357 L->logInt64Value(CurrentFeature, &F); 358 } 359 360 for (size_t I = 1; I < OutputCount; ++I) { 361 const auto &Result = *MUTR->lastEvaluationResult(); 362 const char *RawData = 363 reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); 364 L->logSpecifiedTensorValue(CurrentFeature, RawData); 365 ++CurrentFeature; 366 } 367 368 assert(CurrentFeature == DefaultDecisionPos); 369 L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision); 370 L->logInt64Value(DecisionPos, &Event.AdvisedDecision); 371 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 372 L->logInt64Reward(Event.Reward); 373 374 // For debugging / later use 375 Effects.push_back(Event.Effect); 376 } 377 378 void TrainingLogger::print() { 379 std::error_code EC; 380 raw_fd_ostream OutFile(LogFileName, EC); 381 L->flush(OutFile); 382 } 383 384 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 385 Module &M, ModuleAnalysisManager &MAM, 386 std::unique_ptr<MLModelRunner> ModelRunner, 387 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 388 std::unique_ptr<TrainingLogger> Logger) 389 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 390 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 391 Logger(std::move(Logger)), 392 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 393 CurrentNativeSize(InitialNativeSize) { 394 // We cannot have the case of neither inference nor logging. 395 assert(IsDoingInference || isLogging()); 396 } 397 398 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 399 if (isLogging()) 400 Logger->print(); 401 } 402 403 Optional<size_t> 404 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 405 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 406 return None; 407 auto &R = 408 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 409 if (!R) { 410 F.getParent()->getContext().emitError( 411 "Native size estimator is not present."); 412 return 0; 413 } 414 return *R; 415 } 416 417 std::unique_ptr<MLInlineAdvice> 418 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 419 return std::make_unique<LoggingMLInlineAdvice>( 420 /*Advisor=*/this, 421 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 422 /*Logger=*/*Logger, 423 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 424 /*CalleeSizeEstimateBefore=*/ 425 getNativeSizeEstimate(*CB.getCalledFunction()), 426 /*DefaultDecision=*/true, /*Mandatory*/ true); 427 } 428 429 std::unique_ptr<MLInlineAdvice> 430 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 431 CallBase &CB, OptimizationRemarkEmitter &ORE) { 432 if (IsDoingInference && !isLogging()) 433 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 434 435 bool DefaultAdvice = GetDefaultAdvice(CB); 436 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 437 return std::make_unique<LoggingMLInlineAdvice>( 438 /*Advisor=*/this, 439 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 440 /*Logger=*/*Logger, 441 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 442 /*CalleeSizeEstimateBefore=*/ 443 getNativeSizeEstimate(*CB.getCalledFunction()), 444 /*DefaultDecision=*/DefaultAdvice); 445 } 446 447 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 448 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 449 return 0; 450 size_t Ret = 0; 451 for (auto &F : M) { 452 if (F.isDeclaration()) 453 continue; 454 if (isFunctionDeleted(&F)) 455 continue; 456 Ret += *getNativeSizeEstimate(F); 457 } 458 return Ret; 459 } 460 461 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 462 const std::string &ModelPath) 463 : MLModelRunner(Ctx) { 464 std::vector<TensorSpec> InputSpecs; 465 for (size_t I = 0; I < NumberOfFeatures; ++I) 466 InputSpecs.push_back( 467 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 468 append_range(InputSpecs, TrainingOnlyFeatures); 469 if (auto MaybeOutSpecs = 470 loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride)) 471 OutputSpecs = std::move(*MaybeOutSpecs); 472 else 473 return; 474 475 Evaluator = std::make_unique<TFModelEvaluator>( 476 ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; }, 477 OutputSpecs.size()); 478 if (!Evaluator || !Evaluator->isValid()) { 479 Ctx.emitError("Failed to create inliner saved model evaluator"); 480 Evaluator.reset(); 481 return; 482 } 483 } 484 485 bool ModelUnderTrainingRunner::run() { 486 LastEvaluationResult = Evaluator->evaluate(); 487 if (!LastEvaluationResult.hasValue()) { 488 Ctx.emitError("Error evaluating model."); 489 return false; 490 } 491 int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0); 492 return static_cast<bool>(Decision); 493 } 494 495 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 496 return *Evaluator->getInput<int64_t>(Index); 497 } 498 499 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 500 size_t NumericIndex = static_cast<size_t>(Index); 501 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 502 } 503 504 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 505 Module &M, ModuleAnalysisManager &MAM, 506 std::function<bool(CallBase &)> GetDefaultAdvice) { 507 auto &Ctx = M.getContext(); 508 std::unique_ptr<MLModelRunner> Runner; 509 ModelUnderTrainingRunner *MUTRPtr = nullptr; 510 bool IsDoingInference = false; 511 if (TFModelUnderTrainingPath.empty()) 512 Runner.reset(new NoInferenceModelRunner(Ctx)); 513 else { 514 auto MUTR = std::make_unique<ModelUnderTrainingRunner>( 515 Ctx, TFModelUnderTrainingPath); 516 if (!MUTR || !MUTR->isValid()) { 517 Ctx.emitError("Could not load the policy model from the provided path"); 518 return nullptr; 519 } 520 IsDoingInference = true; 521 MUTRPtr = MUTR.get(); 522 Runner = std::move(MUTR); 523 } 524 std::unique_ptr<TrainingLogger> Logger; 525 if (!TrainingLog.empty()) 526 Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr); 527 528 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 529 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, 530 std::move(Logger)); 531 } 532 #endif // defined(LLVM_HAVE_TF_API) 533