1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a model runner using Tensorflow C APIs, allowing the 10 // loading of a model from a command line option. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Config/config.h" 14 #include "llvm/Support/Casting.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Analysis/CallGraph.h" 19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 20 #include "llvm/Analysis/MLInlineAdvisor.h" 21 #include "llvm/Analysis/ModelUnderTrainingRunner.h" 22 #include "llvm/Analysis/NoInferenceModelRunner.h" 23 #include "llvm/Analysis/Utils/TFUtils.h" 24 #include "llvm/IR/LLVMContext.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/ManagedStatic.h" 27 28 #include <vector> 29 30 using namespace llvm; 31 32 static cl::opt<std::string> TrainingLog( 33 "training-log", cl::Hidden, 34 cl::desc("Path where the development - mode inlining log is saved.")); 35 36 static cl::opt<std::string> TFModelUnderTrainingPath( 37 "ml-inliner-model-under-training", cl::Hidden, 38 cl::desc(R"(Path to SavedModel from the previous training iteration. 39 The directory is also expected to contain a JSON specification of the 40 outputs expected to be logged, where the first entry must be the 41 inlining decision. The file containing the specification should be 42 called output_spec.json. The expected JSON value is an array of 43 dictionaries. Each dictionary should have 2 keys: 44 45 - "tensor_spec, followed by the TensorSpec description of the 46 output; and 47 - "logging_name", a string indicating the name to use when 48 logging the output values. 49 50 Example: 51 [ 52 { 53 "logging_name" : "some_name", 54 "tensor_spec" : { 55 "name" : "model_name", 56 "port" : 0, 57 "shape" : [2, 3], 58 "type" : "float" 59 } 60 } 61 ] 62 63 The first value must always correspond to the decision.)")); 64 65 static cl::opt<std::string> TFOutputSpecOverride( 66 "ml-inliner-output-spec-override", cl::Hidden, 67 cl::desc("Override the path to the output spec json file. See " 68 "-ml-inliner-model-under-training documentation for the " 69 "specification of that file.")); 70 71 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 72 cl::Hidden, cl::init("action_"), 73 cl::desc("Prefix for feature names.")); 74 75 namespace { 76 /// An InlineEvent, used by TrainingLogger. 77 struct InlineEvent { 78 /// What the default policy's decision would have been. 79 int64_t DefaultDecision = 0; 80 81 /// What we advised. When training off the default policy, this is the same as 82 /// DefaultDecision. 83 int64_t AdvisedDecision = 0; 84 85 /// What actually happened. This would be 'false' in the case of an inline 86 /// error, even if AdvisedDecision were true, otherwise it agrees with 87 /// AdvisedDecision. 88 bool Effect = false; 89 90 /// What the change in size was: size_after - size_before 91 int64_t Reward = 0; 92 }; 93 94 /// Collect data we may use for training a model, and write it as a textual 95 /// Tensorflow SequenceExample 96 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 97 /// protobuf (https://developers.google.com/protocol-buffers). 98 /// Because this is a protobuf, we cannot just stream the events as they come. 99 /// Internally, TrainingLogger stores data in column-major format, because that 100 /// lines up with how TF SequenceExample represents it. 101 class TrainingLogger final { 102 public: 103 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 104 105 /// Log one inlining event. 106 void logInlineEvent(const InlineEvent &Event, 107 const MLModelRunner &ModelRunner); 108 109 /// Print the stored tensors. 110 void print(); 111 112 private: 113 StringRef LogFileName; 114 const ModelUnderTrainingRunner *const MUTR; 115 std::unique_ptr<Logger> L; 116 BitVector Effects; 117 /// There's at least one output. We'll set this to a different value if MUTR 118 /// is avaliable. 119 size_t OutputCount = 1; 120 /// Set these 2 clearly OOB, to make sure we set them later. 121 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 122 size_t DecisionPos = std::numeric_limits<size_t>::max(); 123 }; 124 125 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 126 /// the offline training scenario. Note that training happens outside of the 127 /// compiler, this facility is concerned with producing training data ("logs"). 128 /// This InlineAdvisor can operate in the following modes: 129 /// 130 /// 1) collect logs for the default policy. This is useful for bootstrapping 131 /// training, which will be considerably faster by starting from a reasonable 132 /// policy. 133 /// 134 /// 2) collect logs for the ML policy, using a model from a previous 135 /// training. Potentially, that model uses internally some small random 136 /// perturbation of its weights, to induce exploration (setting this up is the 137 /// responsibility of the training algorithm). The logs would then be used to 138 /// retrain and improve on this model. 139 /// 140 /// 3) use the provided model, with no logging. This is useful for end to end 141 /// validation - the model, in this case, is a release candidate and shouldn't 142 /// have random perturbations. It is a convenience feature: rather than needing 143 /// to take the release candidate model and compile it in 'release' mode, 144 /// validate it, then potentially discard it, it's easier to just pass the model 145 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 146 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 147 /// release mode. The expectation is that a well-trained model provides a good 148 /// policy over a sufficiently diverse codebase, over many changes (i.e. 149 /// training happens seldom). 150 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 151 public: 152 DevelopmentModeMLInlineAdvisor( 153 Module &M, ModuleAnalysisManager &MAM, 154 std::unique_ptr<MLModelRunner> ModelRunner, 155 std::function<bool(CallBase &)> GetDefaultAdvice, 156 std::unique_ptr<TrainingLogger> Logger); 157 158 size_t getTotalSizeEstimate(); 159 160 virtual ~DevelopmentModeMLInlineAdvisor(); 161 void updateNativeSizeEstimate(int64_t Change) { 162 *CurrentNativeSize += Change; 163 } 164 void resetNativeSize(Function *F) { 165 PreservedAnalyses PA = PreservedAnalyses::all(); 166 PA.abandon<InlineSizeEstimatorAnalysis>(); 167 FAM.invalidate(*F, PA); 168 } 169 170 std::unique_ptr<MLInlineAdvice> 171 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 172 173 Optional<size_t> getNativeSizeEstimate(const Function &F) const; 174 175 private: 176 bool isLogging() const { return !!Logger; } 177 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 178 179 std::function<bool(CallBase &)> GetDefaultAdvice; 180 const bool IsDoingInference; 181 std::unique_ptr<TrainingLogger> Logger; 182 183 const Optional<int32_t> InitialNativeSize; 184 Optional<int32_t> CurrentNativeSize; 185 }; 186 187 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 188 /// decisions, for training/logging. 189 class LoggingMLInlineAdvice : public MLInlineAdvice { 190 public: 191 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 192 OptimizationRemarkEmitter &ORE, bool Recommendation, 193 TrainingLogger &Logger, 194 Optional<size_t> CallerSizeEstimateBefore, 195 Optional<size_t> CalleeSizeEstimateBefore, 196 bool DefaultDecision, bool Mandatory = false) 197 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 198 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 199 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 200 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 201 202 virtual ~LoggingMLInlineAdvice() = default; 203 204 private: 205 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 206 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 207 } 208 void recordInliningImpl() override { 209 MLInlineAdvice::recordInliningImpl(); 210 getAdvisor()->resetNativeSize(Caller); 211 int Reward = std::numeric_limits<int>::max(); 212 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 213 !getAdvisor()->isForcedToStop()) { 214 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 215 *CalleeSizeEstimateBefore; 216 Reward = NativeSizeAfter - 217 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 218 getAdvisor()->updateNativeSizeEstimate(Reward); 219 } 220 log(Reward, /*Success=*/true); 221 } 222 223 void recordInliningWithCalleeDeletedImpl() override { 224 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 225 getAdvisor()->resetNativeSize(Caller); 226 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 227 !getAdvisor()->isForcedToStop()) { 228 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 229 int Reward = NativeSizeAfter - 230 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 231 getAdvisor()->updateNativeSizeEstimate(Reward); 232 log(Reward, /*Success=*/true); 233 } else { 234 log(NoReward, /*Success=*/true); 235 } 236 } 237 238 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 239 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 240 log(NoReward, /*Success=*/false); 241 } 242 243 void recordUnattemptedInliningImpl() override { 244 MLInlineAdvice::recordUnattemptedInliningImpl(); 245 log(NoReward, /*Success=*/false); 246 } 247 248 void log(int64_t Reward, bool Success) { 249 if (Mandatory) 250 return; 251 InlineEvent Event; 252 Event.AdvisedDecision = isInliningRecommended(); 253 Event.DefaultDecision = DefaultDecision; 254 Event.Effect = Success; 255 Event.Reward = Reward; 256 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 257 } 258 259 static const int64_t NoReward = 0; 260 TrainingLogger &Logger; 261 const Optional<size_t> CallerSizeEstimateBefore; 262 const Optional<size_t> CalleeSizeEstimateBefore; 263 const int64_t DefaultDecision; 264 const int64_t Mandatory; 265 }; 266 267 static const std::vector<TensorSpec> TrainingOnlyFeatures{ 268 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 269 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 270 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 271 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 272 273 static const std::vector<TensorSpec> getInputFeatures() { 274 std::vector<TensorSpec> InputSpecs; 275 for (size_t I = 0; I < NumberOfFeatures; ++I) 276 InputSpecs.push_back( 277 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 278 append_range(InputSpecs, TrainingOnlyFeatures); 279 return InputSpecs; 280 } 281 282 } // namespace 283 284 TrainingLogger::TrainingLogger(StringRef LogFileName, 285 const ModelUnderTrainingRunner *MUTR) 286 : LogFileName(LogFileName), MUTR(MUTR) { 287 // The first output is the inlining decision. 288 if (MUTR) 289 OutputCount = MUTR->outputLoggedFeatureSpecs().size(); 290 std::vector<LoggedFeatureSpec> FT; 291 292 for (size_t I = 0; I < NumberOfFeatures; ++I) 293 FT.push_back( 294 {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None}); 295 if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) 296 append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); 297 298 DefaultDecisionPos = FT.size(); 299 FT.push_back( 300 {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); 301 302 DecisionPos = FT.size(); 303 FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); 304 305 L = std::make_unique<Logger>( 306 FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 307 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 308 } 309 310 /// Log one inlining event. 311 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 312 const MLModelRunner &ModelRunner) { 313 size_t CurrentFeature = 0; 314 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { 315 int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature); 316 L->logInt64Value(CurrentFeature, &F); 317 } 318 319 for (size_t I = 1; I < OutputCount; ++I) { 320 const auto &Result = *MUTR->lastEvaluationResult(); 321 const char *RawData = 322 reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); 323 L->logSpecifiedTensorValue(CurrentFeature, RawData); 324 ++CurrentFeature; 325 } 326 327 assert(CurrentFeature == DefaultDecisionPos); 328 L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision); 329 L->logInt64Value(DecisionPos, &Event.AdvisedDecision); 330 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 331 L->logInt64Reward(Event.Reward); 332 333 // For debugging / later use 334 Effects.push_back(Event.Effect); 335 } 336 337 void TrainingLogger::print() { 338 std::error_code EC; 339 raw_fd_ostream OutFile(LogFileName, EC); 340 L->flush(OutFile); 341 } 342 343 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 344 Module &M, ModuleAnalysisManager &MAM, 345 std::unique_ptr<MLModelRunner> ModelRunner, 346 std::function<bool(CallBase &)> GetDefaultAdvice, 347 std::unique_ptr<TrainingLogger> Logger) 348 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 349 GetDefaultAdvice(GetDefaultAdvice), 350 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), 351 Logger(std::move(Logger)), 352 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 353 CurrentNativeSize(InitialNativeSize) { 354 // We cannot have the case of neither inference nor logging. 355 assert(IsDoingInference || isLogging()); 356 } 357 358 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 359 if (isLogging()) 360 Logger->print(); 361 } 362 363 Optional<size_t> 364 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 365 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 366 return None; 367 auto &R = 368 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 369 if (!R) { 370 F.getParent()->getContext().emitError( 371 "Native size estimator is not present."); 372 return 0; 373 } 374 return *R; 375 } 376 377 std::unique_ptr<MLInlineAdvice> 378 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 379 return std::make_unique<LoggingMLInlineAdvice>( 380 /*Advisor=*/this, 381 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 382 /*Logger=*/*Logger, 383 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 384 /*CalleeSizeEstimateBefore=*/ 385 getNativeSizeEstimate(*CB.getCalledFunction()), 386 /*DefaultDecision=*/true, /*Mandatory*/ true); 387 } 388 389 std::unique_ptr<MLInlineAdvice> 390 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 391 CallBase &CB, OptimizationRemarkEmitter &ORE) { 392 if (IsDoingInference && !isLogging()) 393 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 394 395 bool DefaultAdvice = GetDefaultAdvice(CB); 396 auto Recommendation = 397 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) 398 : DefaultAdvice; 399 return std::make_unique<LoggingMLInlineAdvice>( 400 /*Advisor=*/this, 401 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 402 /*Logger=*/*Logger, 403 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 404 /*CalleeSizeEstimateBefore=*/ 405 getNativeSizeEstimate(*CB.getCalledFunction()), 406 /*DefaultDecision=*/DefaultAdvice); 407 } 408 409 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 410 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 411 return 0; 412 size_t Ret = 0; 413 for (auto &F : M) { 414 if (F.isDeclaration()) 415 continue; 416 Ret += *getNativeSizeEstimate(F); 417 } 418 return Ret; 419 } 420 421 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 422 Module &M, ModuleAnalysisManager &MAM, 423 std::function<bool(CallBase &)> GetDefaultAdvice) { 424 auto &Ctx = M.getContext(); 425 std::unique_ptr<MLModelRunner> Runner; 426 if (TFModelUnderTrainingPath.empty()) 427 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); 428 else 429 Runner = ModelUnderTrainingRunner::createAndEnsureValid( 430 Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), 431 TFOutputSpecOverride); 432 if (!Runner) 433 return nullptr; 434 std::unique_ptr<TrainingLogger> Logger; 435 if (!TrainingLog.empty()) 436 Logger = std::make_unique<TrainingLogger>( 437 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); 438 439 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 440 M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); 441 } 442 #endif // defined(LLVM_HAVE_TF_API) 443