1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a model runner using Tensorflow C APIs, allowing the 10 // loading of a model from a command line option. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Config/config.h" 14 #if defined(LLVM_HAVE_TF_API) 15 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/ModelUnderTrainingRunner.h" 21 #include "llvm/Analysis/NoInferenceModelRunner.h" 22 #include "llvm/Analysis/Utils/TFUtils.h" 23 #include "llvm/IR/LLVMContext.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/ManagedStatic.h" 26 27 #include <vector> 28 29 using namespace llvm; 30 31 static cl::opt<std::string> TrainingLog( 32 "training-log", cl::Hidden, 33 cl::desc("Path where the development - mode inlining log is saved.")); 34 35 static cl::opt<std::string> TFModelUnderTrainingPath( 36 "ml-inliner-model-under-training", cl::Hidden, 37 cl::desc(R"(Path to SavedModel from the previous training iteration. 38 The directory is also expected to contain a JSON specification of the 39 outputs expected to be logged, where the first entry must be the 40 inlining decision. The file containing the specification should be 41 called output_spec.json. The expected JSON value is an array of 42 dictionaries. Each dictionary should have 2 keys: 43 44 - "tensor_spec, followed by the TensorSpec description of the 45 output; and 46 - "logging_name", a string indicating the name to use when 47 logging the output values. 48 49 Example: 50 [ 51 { 52 "logging_name" : "some_name", 53 "tensor_spec" : { 54 "name" : "model_name", 55 "port" : 0, 56 "shape" : [2, 3], 57 "type" : "float" 58 } 59 } 60 ] 61 62 The first value must always correspond to the decision.)")); 63 64 static cl::opt<std::string> TFOutputSpecOverride( 65 "ml-inliner-output-spec-override", cl::Hidden, 66 cl::desc("Override the path to the output spec json file. See " 67 "-ml-inliner-model-under-training documentation for the " 68 "specification of that file.")); 69 70 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 71 cl::Hidden, cl::init("action_"), 72 cl::desc("Prefix for feature names.")); 73 74 namespace { 75 /// An InlineEvent, used by TrainingLogger. 76 struct InlineEvent { 77 /// What the default policy's decision would have been. 78 int64_t DefaultDecision = 0; 79 80 /// What we advised. When training off the default policy, this is the same as 81 /// DefaultDecision. 82 int64_t AdvisedDecision = 0; 83 84 /// What actually happened. This would be 'false' in the case of an inline 85 /// error, even if AdvisedDecision were true, otherwise it agrees with 86 /// AdvisedDecision. 87 bool Effect = false; 88 89 /// What the change in size was: size_after - size_before 90 int64_t Reward = 0; 91 }; 92 93 /// Collect data we may use for training a model, and write it as a textual 94 /// Tensorflow SequenceExample 95 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 96 /// protobuf (https://developers.google.com/protocol-buffers). 97 /// Because this is a protobuf, we cannot just stream the events as they come. 98 /// Internally, TrainingLogger stores data in column-major format, because that 99 /// lines up with how TF SequenceExample represents it. 100 class TrainingLogger final { 101 public: 102 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 103 104 /// Log one inlining event. 105 void logInlineEvent(const InlineEvent &Event, 106 const MLModelRunner &ModelRunner); 107 108 /// Print the stored tensors. 109 void print(); 110 111 private: 112 StringRef LogFileName; 113 const ModelUnderTrainingRunner *const MUTR; 114 std::unique_ptr<Logger> L; 115 BitVector Effects; 116 /// There's at least one output. We'll set this to a different value if MUTR 117 /// is avaliable. 118 size_t OutputCount = 1; 119 /// Set these 2 clearly OOB, to make sure we set them later. 120 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 121 size_t DecisionPos = std::numeric_limits<size_t>::max(); 122 }; 123 124 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 125 /// the offline training scenario. Note that training happens outside of the 126 /// compiler, this facility is concerned with producing training data ("logs"). 127 /// This InlineAdvisor can operate in the following modes: 128 /// 129 /// 1) collect logs for the default policy. This is useful for bootstrapping 130 /// training, which will be considerably faster by starting from a reasonable 131 /// policy. 132 /// 133 /// 2) collect logs for the ML policy, using a model from a previous 134 /// training. Potentially, that model uses internally some small random 135 /// perturbation of its weights, to induce exploration (setting this up is the 136 /// responsibility of the training algorithm). The logs would then be used to 137 /// retrain and improve on this model. 138 /// 139 /// 3) use the provided model, with no logging. This is useful for end to end 140 /// validation - the model, in this case, is a release candidate and shouldn't 141 /// have random perturbations. It is a convenience feature: rather than needing 142 /// to take the release candidate model and compile it in 'release' mode, 143 /// validate it, then potentially discard it, it's easier to just pass the model 144 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 145 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 146 /// release mode. The expectation is that a well-trained model provides a good 147 /// policy over a sufficiently diverse codebase, over many changes (i.e. 148 /// training happens seldom). 149 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 150 public: 151 DevelopmentModeMLInlineAdvisor( 152 Module &M, ModuleAnalysisManager &MAM, 153 std::unique_ptr<MLModelRunner> ModelRunner, 154 std::function<bool(CallBase &)> GetDefaultAdvice, 155 std::unique_ptr<TrainingLogger> Logger); 156 157 size_t getTotalSizeEstimate(); 158 159 virtual ~DevelopmentModeMLInlineAdvisor(); 160 void updateNativeSizeEstimate(int64_t Change) { 161 *CurrentNativeSize += Change; 162 } 163 void resetNativeSize(Function *F) { 164 PreservedAnalyses PA = PreservedAnalyses::all(); 165 PA.abandon<InlineSizeEstimatorAnalysis>(); 166 FAM.invalidate(*F, PA); 167 } 168 169 std::unique_ptr<MLInlineAdvice> 170 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 171 172 Optional<size_t> getNativeSizeEstimate(const Function &F) const; 173 174 private: 175 bool isLogging() const { return !!Logger; } 176 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 177 178 std::function<bool(CallBase &)> GetDefaultAdvice; 179 const bool IsDoingInference; 180 std::unique_ptr<TrainingLogger> Logger; 181 182 const Optional<int32_t> InitialNativeSize; 183 Optional<int32_t> CurrentNativeSize; 184 }; 185 186 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 187 /// decisions, for training/logging. 188 class LoggingMLInlineAdvice : public MLInlineAdvice { 189 public: 190 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 191 OptimizationRemarkEmitter &ORE, bool Recommendation, 192 TrainingLogger &Logger, 193 Optional<size_t> CallerSizeEstimateBefore, 194 Optional<size_t> CalleeSizeEstimateBefore, 195 bool DefaultDecision, bool Mandatory = false) 196 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 197 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 198 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 199 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 200 201 virtual ~LoggingMLInlineAdvice() = default; 202 203 private: 204 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 205 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 206 } 207 void recordInliningImpl() override { 208 MLInlineAdvice::recordInliningImpl(); 209 getAdvisor()->resetNativeSize(Caller); 210 int Reward = std::numeric_limits<int>::max(); 211 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 212 !getAdvisor()->isForcedToStop()) { 213 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 214 *CalleeSizeEstimateBefore; 215 Reward = NativeSizeAfter - 216 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 217 getAdvisor()->updateNativeSizeEstimate(Reward); 218 } 219 log(Reward, /*Success=*/true); 220 } 221 222 void recordInliningWithCalleeDeletedImpl() override { 223 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 224 getAdvisor()->resetNativeSize(Caller); 225 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 226 !getAdvisor()->isForcedToStop()) { 227 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 228 int Reward = NativeSizeAfter - 229 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 230 getAdvisor()->updateNativeSizeEstimate(Reward); 231 log(Reward, /*Success=*/true); 232 } else { 233 log(NoReward, /*Success=*/true); 234 } 235 } 236 237 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 238 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 239 log(NoReward, /*Success=*/false); 240 } 241 242 void recordUnattemptedInliningImpl() override { 243 MLInlineAdvice::recordUnattemptedInliningImpl(); 244 log(NoReward, /*Success=*/false); 245 } 246 247 void log(int64_t Reward, bool Success) { 248 if (Mandatory) 249 return; 250 InlineEvent Event; 251 Event.AdvisedDecision = isInliningRecommended(); 252 Event.DefaultDecision = DefaultDecision; 253 Event.Effect = Success; 254 Event.Reward = Reward; 255 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 256 } 257 258 static const int64_t NoReward = 0; 259 TrainingLogger &Logger; 260 const Optional<size_t> CallerSizeEstimateBefore; 261 const Optional<size_t> CalleeSizeEstimateBefore; 262 const int64_t DefaultDecision; 263 const int64_t Mandatory; 264 }; 265 266 static const std::vector<TensorSpec> TrainingOnlyFeatures{ 267 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 268 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 269 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 270 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 271 272 static const std::vector<TensorSpec> getInputFeatures() { 273 std::vector<TensorSpec> InputSpecs; 274 for (size_t I = 0; I < NumberOfFeatures; ++I) 275 InputSpecs.push_back(TensorSpec::createSpec<int64_t>( 276 TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape())); 277 append_range(InputSpecs, TrainingOnlyFeatures); 278 return InputSpecs; 279 } 280 281 } // namespace 282 283 TrainingLogger::TrainingLogger(StringRef LogFileName, 284 const ModelUnderTrainingRunner *MUTR) 285 : LogFileName(LogFileName), MUTR(MUTR) { 286 // The first output is the inlining decision. 287 if (MUTR) 288 OutputCount = MUTR->outputLoggedFeatureSpecs().size(); 289 std::vector<LoggedFeatureSpec> FT; 290 291 for (size_t I = 0; I < NumberOfFeatures; ++I) 292 FT.push_back({FeatureMap.at(I), None}); 293 if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) 294 append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); 295 296 DefaultDecisionPos = FT.size(); 297 FT.push_back( 298 {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); 299 300 DecisionPos = FT.size(); 301 FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); 302 303 L = std::make_unique<Logger>( 304 FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 305 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 306 } 307 308 /// Log one inlining event. 309 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 310 const MLModelRunner &ModelRunner) { 311 size_t CurrentFeature = 0; 312 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { 313 int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature); 314 L->logInt64Value(CurrentFeature, &F); 315 } 316 317 for (size_t I = 1; I < OutputCount; ++I) { 318 const auto &Result = *MUTR->lastEvaluationResult(); 319 const char *RawData = 320 reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); 321 L->logSpecifiedTensorValue(CurrentFeature, RawData); 322 ++CurrentFeature; 323 } 324 325 assert(CurrentFeature == DefaultDecisionPos); 326 L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision); 327 L->logInt64Value(DecisionPos, &Event.AdvisedDecision); 328 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 329 L->logInt64Reward(Event.Reward); 330 331 // For debugging / later use 332 Effects.push_back(Event.Effect); 333 } 334 335 void TrainingLogger::print() { 336 std::error_code EC; 337 raw_fd_ostream OutFile(LogFileName, EC); 338 L->flush(OutFile); 339 } 340 341 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 342 Module &M, ModuleAnalysisManager &MAM, 343 std::unique_ptr<MLModelRunner> ModelRunner, 344 std::function<bool(CallBase &)> GetDefaultAdvice, 345 std::unique_ptr<TrainingLogger> Logger) 346 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 347 GetDefaultAdvice(GetDefaultAdvice), 348 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), 349 Logger(std::move(Logger)), 350 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 351 CurrentNativeSize(InitialNativeSize) { 352 // We cannot have the case of neither inference nor logging. 353 assert(IsDoingInference || isLogging()); 354 } 355 356 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 357 if (isLogging()) 358 Logger->print(); 359 } 360 361 Optional<size_t> 362 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 363 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 364 return None; 365 auto &R = 366 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 367 if (!R) { 368 F.getParent()->getContext().emitError( 369 "Native size estimator is not present."); 370 return 0; 371 } 372 return *R; 373 } 374 375 std::unique_ptr<MLInlineAdvice> 376 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 377 return std::make_unique<LoggingMLInlineAdvice>( 378 /*Advisor=*/this, 379 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 380 /*Logger=*/*Logger, 381 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 382 /*CalleeSizeEstimateBefore=*/ 383 getNativeSizeEstimate(*CB.getCalledFunction()), 384 /*DefaultDecision=*/true, /*Mandatory*/ true); 385 } 386 387 std::unique_ptr<MLInlineAdvice> 388 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 389 CallBase &CB, OptimizationRemarkEmitter &ORE) { 390 if (IsDoingInference && !isLogging()) 391 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 392 393 bool DefaultAdvice = GetDefaultAdvice(CB); 394 auto Recommendation = 395 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) 396 : DefaultAdvice; 397 return std::make_unique<LoggingMLInlineAdvice>( 398 /*Advisor=*/this, 399 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 400 /*Logger=*/*Logger, 401 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 402 /*CalleeSizeEstimateBefore=*/ 403 getNativeSizeEstimate(*CB.getCalledFunction()), 404 /*DefaultDecision=*/DefaultAdvice); 405 } 406 407 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 408 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 409 return 0; 410 size_t Ret = 0; 411 for (auto &F : M) { 412 if (F.isDeclaration()) 413 continue; 414 Ret += *getNativeSizeEstimate(F); 415 } 416 return Ret; 417 } 418 419 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 420 Module &M, ModuleAnalysisManager &MAM, 421 std::function<bool(CallBase &)> GetDefaultAdvice) { 422 auto &Ctx = M.getContext(); 423 std::unique_ptr<MLModelRunner> Runner; 424 if (TFModelUnderTrainingPath.empty()) 425 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); 426 else 427 Runner = ModelUnderTrainingRunner::createAndEnsureValid( 428 Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), 429 TFOutputSpecOverride); 430 if (!Runner) 431 return nullptr; 432 std::unique_ptr<TrainingLogger> Logger; 433 if (!TrainingLog.empty()) 434 Logger = std::make_unique<TrainingLogger>( 435 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); 436 437 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 438 M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); 439 } 440 #endif // defined(LLVM_HAVE_TF_API) 441