1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/Utils/TFUtils.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/ManagedStatic.h" 24 25 #include <vector> 26 27 using namespace llvm; 28 29 static cl::opt<std::string> TrainingLog( 30 "training-log", cl::Hidden, 31 cl::desc("Path where the development - mode inlining log is saved.")); 32 33 static cl::opt<std::string> TFModelUnderTrainingPath( 34 "ml-inliner-model-under-training", cl::Hidden, 35 cl::desc(R"(Path to SavedModel from the previous training iteration. 36 The directory is also expected to contain a JSON specification of the 37 outputs expected to be logged, where the first entry must be the 38 inlining decision. The file containing the specification should be 39 called output_spec.json. The expected JSON value is an array of 40 dictionaries. Each dictionary should have 2 keys: 41 42 - "tensor_spec, followed by the TensorSpec description of the 43 output; and 44 - "logging_name", a string indicating the name to use when 45 logging the output values. 46 47 Example: 48 [ 49 { 50 "logging_name" : "some_name", 51 "tensor_spec" : { 52 "name" : "model_name", 53 "port" : 0, 54 "shape" : [2, 3], 55 "type" : "float" 56 } 57 } 58 ] 59 60 The first value must always correspond to the decision.)")); 61 62 static cl::opt<std::string> TFOutputSpecOverride( 63 "ml-inliner-output-spec-override", cl::Hidden, 64 cl::desc("Override the path to the output spec json file. See " 65 "-ml-inliner-model-under-training documentation for the " 66 "specification of that file.")); 67 68 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 69 cl::Hidden, cl::init("action_"), 70 cl::desc("Prefix for feature names.")); 71 72 namespace { 73 /// An InlineEvent, used by TrainingLogger. 74 struct InlineEvent { 75 /// What the default policy's decision would have been. 76 int64_t DefaultDecision = 0; 77 78 /// What we advised. When training off the default policy, this is the same as 79 /// DefaultDecision. 80 int64_t AdvisedDecision = 0; 81 82 /// What actually happened. This would be 'false' in the case of an inline 83 /// error, even if AdvisedDecision were true, otherwise it agrees with 84 /// AdvisedDecision. 85 bool Effect = false; 86 87 /// What the change in size was: size_after - size_before 88 int64_t Reward = 0; 89 }; 90 91 /// Collect data we may use for training a model, and write it as a textual 92 /// Tensorflow SequenceExample 93 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 94 /// protobuf (https://developers.google.com/protocol-buffers). 95 /// Because this is a protobuf, we cannot just stream the events as they come. 96 /// Internally, TrainingLogger stores data in column-major format, because that 97 /// lines up with how TF SequenceExample represents it. 98 class ModelUnderTrainingRunner; 99 class TrainingLogger final { 100 public: 101 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 102 103 /// Log one inlining event. 104 void logInlineEvent(const InlineEvent &Event, 105 const MLModelRunner &ModelRunner); 106 107 /// Print the stored tensors. 108 void print(); 109 110 private: 111 StringRef LogFileName; 112 const ModelUnderTrainingRunner *const MUTR; 113 std::unique_ptr<Logger> L; 114 std::vector<bool> Effects; 115 /// There's at least one output. We'll set this to a different value if MUTR 116 /// is avaliable. 117 size_t OutputCount = 1; 118 /// Set these 2 clearly OOB, to make sure we set them later. 119 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 120 size_t DecisionPos = std::numeric_limits<size_t>::max(); 121 }; 122 123 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 124 /// the offline training scenario. Note that training happens outside of the 125 /// compiler, this facility is concerned with producing training data ("logs"). 126 /// This InlineAdvisor can operate in the following modes: 127 /// 128 /// 1) collect logs for the default policy. This is useful for bootstrapping 129 /// training, which will be considerably faster by starting from a reasonable 130 /// policy. 131 /// 132 /// 2) collect logs for the ML policy, using a model from a previous 133 /// training. Potentially, that model uses internally some small random 134 /// perturbation of its weights, to induce exploration (setting this up is the 135 /// responsibility of the training algorithm). The logs would then be used to 136 /// retrain and improve on this model. 137 /// 138 /// 3) use the provided model, with no logging. This is useful for end to end 139 /// validation - the model, in this case, is a release candidate and shouldn't 140 /// have random perturbations. It is a convenience feature: rather than needing 141 /// to take the release candidate model and compile it in 'release' mode, 142 /// validate it, then potentially discard it, it's easier to just pass the model 143 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 144 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 145 /// release mode. The expectation is that a well-trained model provides a good 146 /// policy over a sufficiently diverse codebase, over many changes (i.e. 147 /// training happens seldom). 148 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 149 public: 150 DevelopmentModeMLInlineAdvisor( 151 Module &M, ModuleAnalysisManager &MAM, 152 std::unique_ptr<MLModelRunner> ModelRunner, 153 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 154 std::unique_ptr<TrainingLogger> Logger); 155 156 size_t getTotalSizeEstimate(); 157 158 virtual ~DevelopmentModeMLInlineAdvisor(); 159 void updateNativeSizeEstimate(int64_t Change) { 160 *CurrentNativeSize += Change; 161 } 162 void resetNativeSize(Function *F) { 163 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 164 } 165 166 std::unique_ptr<MLInlineAdvice> 167 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 168 169 Optional<size_t> getNativeSizeEstimate(const Function &F) const; 170 171 private: 172 bool isLogging() const { return !!Logger; } 173 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 174 175 std::function<bool(CallBase &)> GetDefaultAdvice; 176 const bool IsDoingInference; 177 std::unique_ptr<TrainingLogger> Logger; 178 179 const Optional<int32_t> InitialNativeSize; 180 Optional<int32_t> CurrentNativeSize; 181 }; 182 183 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 184 /// decisions, for training/logging. 185 class LoggingMLInlineAdvice : public MLInlineAdvice { 186 public: 187 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 188 OptimizationRemarkEmitter &ORE, bool Recommendation, 189 TrainingLogger &Logger, 190 Optional<size_t> CallerSizeEstimateBefore, 191 Optional<size_t> CalleeSizeEstimateBefore, 192 bool DefaultDecision, bool Mandatory = false) 193 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 194 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 195 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 196 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 197 198 virtual ~LoggingMLInlineAdvice() = default; 199 200 private: 201 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 202 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 203 } 204 void recordInliningImpl() override { 205 MLInlineAdvice::recordInliningImpl(); 206 getAdvisor()->resetNativeSize(Caller); 207 int Reward = std::numeric_limits<int>::max(); 208 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 209 !getAdvisor()->isForcedToStop()) { 210 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 211 *CalleeSizeEstimateBefore; 212 Reward = NativeSizeAfter - 213 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 214 getAdvisor()->updateNativeSizeEstimate(Reward); 215 } 216 log(Reward, /*Success=*/true); 217 } 218 219 void recordInliningWithCalleeDeletedImpl() override { 220 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 221 getAdvisor()->resetNativeSize(Caller); 222 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 223 !getAdvisor()->isForcedToStop()) { 224 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 225 int Reward = NativeSizeAfter - 226 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 227 getAdvisor()->updateNativeSizeEstimate(Reward); 228 log(Reward, /*Success=*/true); 229 } 230 } 231 232 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 233 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 234 log(NoReward, /*Success=*/false); 235 } 236 237 void recordUnattemptedInliningImpl() override { 238 MLInlineAdvice::recordUnattemptedInliningImpl(); 239 log(NoReward, /*Success=*/false); 240 } 241 242 void log(int64_t Reward, bool Success) { 243 if (Mandatory) 244 return; 245 InlineEvent Event; 246 Event.AdvisedDecision = isInliningRecommended(); 247 Event.DefaultDecision = DefaultDecision; 248 Event.Effect = Success; 249 Event.Reward = Reward; 250 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 251 } 252 253 static const int64_t NoReward = 0; 254 TrainingLogger &Logger; 255 const Optional<size_t> CallerSizeEstimateBefore; 256 const Optional<size_t> CalleeSizeEstimateBefore; 257 const int64_t DefaultDecision; 258 const int64_t Mandatory; 259 }; 260 261 /// A pseudo model runner. We use it to store feature values when collecting 262 /// logs for the default policy, but never ask it to 'run'. 263 class NoInferenceModelRunner : public MLModelRunner { 264 public: 265 NoInferenceModelRunner(LLVMContext &Ctx) 266 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 267 void setFeature(FeatureIndex Index, int64_t Value) override { 268 Features[static_cast<int>(Index)] = Value; 269 } 270 271 int64_t getFeature(int Index) const override { return Features[Index]; } 272 bool run() override { 273 llvm_unreachable("We shouldn't call run on this model runner."); 274 } 275 276 private: 277 InlineFeatures Features; 278 }; 279 280 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 281 /// to dynamically load and evaluate a TF SavedModel 282 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 283 /// sacrificed for ease of use while training. 284 class ModelUnderTrainingRunner final : public MLModelRunner { 285 public: 286 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 287 288 bool run() override; 289 290 // Disallows copy and assign. 291 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 292 ModelUnderTrainingRunner & 293 operator=(const ModelUnderTrainingRunner &) = delete; 294 295 void setFeature(FeatureIndex Index, int64_t Value) override; 296 int64_t getFeature(int Index) const override; 297 bool isValid() const { return !!Evaluator; } 298 299 const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const { 300 return OutputSpecs; 301 } 302 303 const Optional<TFModelEvaluator::EvaluationResult> & 304 lastEvaluationResult() const { 305 return LastEvaluationResult; 306 } 307 308 private: 309 std::unique_ptr<TFModelEvaluator> Evaluator; 310 std::vector<LoggedFeatureSpec> OutputSpecs; 311 Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult; 312 313 // The training framework needs some additional features. 314 const std::vector<TensorSpec> TrainingOnlyFeatures{ 315 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 316 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 317 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 318 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 319 }; 320 } // namespace 321 322 TrainingLogger::TrainingLogger(StringRef LogFileName, 323 const ModelUnderTrainingRunner *MUTR) 324 : LogFileName(LogFileName), MUTR(MUTR) { 325 // The first output is the inlining decision. 326 if (MUTR) 327 OutputCount = MUTR->outputLoggedFeatureSpecs().size(); 328 std::vector<LoggedFeatureSpec> FT; 329 330 for (size_t I = 0; I < NumberOfFeatures; ++I) 331 FT.push_back( 332 {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None}); 333 if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) 334 append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); 335 336 DefaultDecisionPos = FT.size(); 337 FT.push_back( 338 {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); 339 340 DecisionPos = FT.size(); 341 FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); 342 343 L = std::make_unique<Logger>( 344 FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 345 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 346 } 347 348 /// Log one inlining event. 349 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 350 const MLModelRunner &ModelRunner) { 351 size_t CurrentFeature = 0; 352 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { 353 int64_t F = ModelRunner.getFeature(CurrentFeature); 354 L->logTensorValue(CurrentFeature, &F); 355 } 356 357 for (size_t I = 1; I < OutputCount; ++I) { 358 const auto &Result = *MUTR->lastEvaluationResult(); 359 auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec; 360 const char *RawData = 361 reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); 362 L->logTensorValue(CurrentFeature, RawData, 363 Spec.getElementCount() * Spec.getElementByteSize()); 364 ++CurrentFeature; 365 } 366 367 assert(CurrentFeature == DefaultDecisionPos); 368 L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision); 369 L->logTensorValue(DecisionPos, &Event.AdvisedDecision); 370 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 371 L->logReward(Event.Reward); 372 373 // For debugging / later use 374 Effects.push_back(Event.Effect); 375 } 376 377 void TrainingLogger::print() { 378 std::error_code EC; 379 raw_fd_ostream OutFile(LogFileName, EC); 380 L->print(OutFile); 381 } 382 383 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 384 Module &M, ModuleAnalysisManager &MAM, 385 std::unique_ptr<MLModelRunner> ModelRunner, 386 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 387 std::unique_ptr<TrainingLogger> Logger) 388 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 389 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 390 Logger(std::move(Logger)), 391 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 392 CurrentNativeSize(InitialNativeSize) { 393 // We cannot have the case of neither inference nor logging. 394 assert(IsDoingInference || isLogging()); 395 } 396 397 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 398 if (isLogging()) 399 Logger->print(); 400 } 401 402 Optional<size_t> 403 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 404 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 405 return None; 406 auto &R = 407 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 408 if (!R) { 409 F.getParent()->getContext().emitError( 410 "Native size estimator is not present."); 411 return 0; 412 } 413 return *R; 414 } 415 416 std::unique_ptr<MLInlineAdvice> 417 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 418 return std::make_unique<LoggingMLInlineAdvice>( 419 /*Advisor=*/this, 420 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 421 /*Logger=*/*Logger, 422 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 423 /*CalleeSizeEstimateBefore=*/ 424 getNativeSizeEstimate(*CB.getCalledFunction()), 425 /*DefaultDecision=*/true, /*Mandatory*/ true); 426 } 427 428 std::unique_ptr<MLInlineAdvice> 429 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 430 CallBase &CB, OptimizationRemarkEmitter &ORE) { 431 if (IsDoingInference && !isLogging()) 432 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 433 434 bool DefaultAdvice = GetDefaultAdvice(CB); 435 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 436 return std::make_unique<LoggingMLInlineAdvice>( 437 /*Advisor=*/this, 438 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 439 /*Logger=*/*Logger, 440 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 441 /*CalleeSizeEstimateBefore=*/ 442 getNativeSizeEstimate(*CB.getCalledFunction()), 443 /*DefaultDecision=*/DefaultAdvice); 444 } 445 446 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 447 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 448 return 0; 449 size_t Ret = 0; 450 for (auto &F : M) { 451 if (F.isDeclaration()) 452 continue; 453 if (isFunctionDeleted(&F)) 454 continue; 455 Ret += *getNativeSizeEstimate(F); 456 } 457 return Ret; 458 } 459 460 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 461 const std::string &ModelPath) 462 : MLModelRunner(Ctx) { 463 std::vector<TensorSpec> InputSpecs; 464 for (size_t I = 0; I < NumberOfFeatures; ++I) 465 InputSpecs.push_back( 466 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 467 append_range(InputSpecs, TrainingOnlyFeatures); 468 if (auto MaybeOutSpecs = 469 loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride)) 470 OutputSpecs = std::move(*MaybeOutSpecs); 471 else 472 return; 473 474 Evaluator = std::make_unique<TFModelEvaluator>( 475 ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; }, 476 OutputSpecs.size()); 477 if (!Evaluator || !Evaluator->isValid()) { 478 Ctx.emitError("Failed to create inliner saved model evaluator"); 479 Evaluator.reset(); 480 return; 481 } 482 } 483 484 bool ModelUnderTrainingRunner::run() { 485 LastEvaluationResult = Evaluator->evaluate(); 486 if (!LastEvaluationResult.hasValue()) { 487 Ctx.emitError("Error evaluating model."); 488 return false; 489 } 490 int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0); 491 return static_cast<bool>(Decision); 492 } 493 494 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 495 return *Evaluator->getInput<int64_t>(Index); 496 } 497 498 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 499 size_t NumericIndex = static_cast<size_t>(Index); 500 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 501 } 502 503 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 504 Module &M, ModuleAnalysisManager &MAM, 505 std::function<bool(CallBase &)> GetDefaultAdvice) { 506 auto &Ctx = M.getContext(); 507 std::unique_ptr<MLModelRunner> Runner; 508 ModelUnderTrainingRunner *MUTRPtr = nullptr; 509 bool IsDoingInference = false; 510 if (TFModelUnderTrainingPath.empty()) 511 Runner.reset(new NoInferenceModelRunner(Ctx)); 512 else { 513 auto MUTR = std::make_unique<ModelUnderTrainingRunner>( 514 Ctx, TFModelUnderTrainingPath); 515 if (!MUTR || !MUTR->isValid()) { 516 Ctx.emitError("Could not load the policy model from the provided path"); 517 return nullptr; 518 } 519 IsDoingInference = true; 520 MUTRPtr = MUTR.get(); 521 Runner = std::move(MUTR); 522 } 523 std::unique_ptr<TrainingLogger> Logger; 524 if (!TrainingLog.empty()) 525 Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr); 526 527 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 528 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, 529 std::move(Logger)); 530 } 531 #endif // defined(LLVM_HAVE_TF_API) 532