1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a model runner using Tensorflow C APIs, allowing the 10 // loading of a model from a command line option. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Analysis/TensorSpec.h" 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TFLITE) 16 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Analysis/CallGraph.h" 19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 20 #include "llvm/Analysis/MLInlineAdvisor.h" 21 #include "llvm/Analysis/ModelUnderTrainingRunner.h" 22 #include "llvm/Analysis/NoInferenceModelRunner.h" 23 #include "llvm/Analysis/Utils/TFUtils.h" 24 #include "llvm/Analysis/Utils/TrainingLogger.h" 25 #include "llvm/IR/LLVMContext.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/ManagedStatic.h" 28 29 #include <vector> 30 #include <optional> 31 32 using namespace llvm; 33 34 static cl::opt<std::string> TrainingLog( 35 "training-log", cl::Hidden, 36 cl::desc("Path where the development - mode inlining log is saved.")); 37 38 static cl::opt<std::string> TFModelUnderTrainingPath( 39 "ml-inliner-model-under-training", cl::Hidden, 40 cl::desc(R"(Path to SavedModel from the previous training iteration. 41 The directory is also expected to contain a JSON specification of the 42 outputs expected to be logged, where the first entry must be the 43 inlining decision. The file containing the specification should be 44 called output_spec.json. The expected JSON value is an array of 45 dictionaries. Each dictionary should have 2 keys: 46 47 - "tensor_spec, followed by the TensorSpec description of the 48 output; and 49 - "logging_name", a string indicating the name to use when 50 logging the output values. 51 52 Example: 53 [ 54 { 55 "logging_name" : "some_name", 56 "tensor_spec" : { 57 "name" : "model_name", 58 "port" : 0, 59 "shape" : [2, 3], 60 "type" : "float" 61 } 62 } 63 ] 64 65 The first value must always correspond to the decision.)")); 66 67 static cl::opt<std::string> TFOutputSpecOverride( 68 "ml-inliner-output-spec-override", cl::Hidden, 69 cl::desc("Override the path to the output spec json file. See " 70 "-ml-inliner-model-under-training documentation for the " 71 "specification of that file.")); 72 73 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 74 cl::Hidden, cl::init("action_"), 75 cl::desc("Prefix for feature names.")); 76 77 namespace { 78 /// An InlineEvent, used by TrainingLogger. 79 struct InlineEvent { 80 /// What the default policy's decision would have been. 81 int64_t DefaultDecision = 0; 82 83 /// What we advised. When training off the default policy, this is the same as 84 /// DefaultDecision. 85 int64_t AdvisedDecision = 0; 86 87 /// What actually happened. This would be 'false' in the case of an inline 88 /// error, even if AdvisedDecision were true, otherwise it agrees with 89 /// AdvisedDecision. 90 bool Effect = false; 91 92 /// What the change in size was: size_after - size_before 93 int64_t Reward = 0; 94 }; 95 96 /// Collect data we may use for training a model. 97 class TrainingLogger final { 98 public: 99 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 100 101 /// Log one inlining event. 102 void logInlineEvent(const InlineEvent &Event, 103 const MLModelRunner &ModelRunner); 104 105 private: 106 StringRef LogFileName; 107 const ModelUnderTrainingRunner *const MUTR; 108 std::unique_ptr<Logger> L; 109 BitVector Effects; 110 /// Set these 2 clearly OOB, to make sure we set them later. 111 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 112 size_t DecisionPos = std::numeric_limits<size_t>::max(); 113 }; 114 115 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 116 /// the offline training scenario. Note that training happens outside of the 117 /// compiler, this facility is concerned with producing training data ("logs"). 118 /// This InlineAdvisor can operate in the following modes: 119 /// 120 /// 1) collect logs for the default policy. This is useful for bootstrapping 121 /// training, which will be considerably faster by starting from a reasonable 122 /// policy. 123 /// 124 /// 2) collect logs for the ML policy, using a model from a previous 125 /// training. Potentially, that model uses internally some small random 126 /// perturbation of its weights, to induce exploration (setting this up is the 127 /// responsibility of the training algorithm). The logs would then be used to 128 /// retrain and improve on this model. 129 /// 130 /// 3) use the provided model, with no logging. This is useful for end to end 131 /// validation - the model, in this case, is a release candidate and shouldn't 132 /// have random perturbations. It is a convenience feature: rather than needing 133 /// to take the release candidate model and compile it in 'release' mode, 134 /// validate it, then potentially discard it, it's easier to just pass the model 135 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 136 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 137 /// release mode. The expectation is that a well-trained model provides a good 138 /// policy over a sufficiently diverse codebase, over many changes (i.e. 139 /// training happens seldom). 140 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 141 public: 142 DevelopmentModeMLInlineAdvisor( 143 Module &M, ModuleAnalysisManager &MAM, 144 std::unique_ptr<MLModelRunner> ModelRunner, 145 std::function<bool(CallBase &)> GetDefaultAdvice, 146 std::unique_ptr<TrainingLogger> Logger); 147 148 size_t getTotalSizeEstimate(); 149 150 void updateNativeSizeEstimate(int64_t Change) { 151 *CurrentNativeSize += Change; 152 } 153 void resetNativeSize(Function *F) { 154 PreservedAnalyses PA = PreservedAnalyses::all(); 155 PA.abandon<InlineSizeEstimatorAnalysis>(); 156 FAM.invalidate(*F, PA); 157 } 158 159 std::unique_ptr<MLInlineAdvice> 160 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 161 162 std::optional<size_t> getNativeSizeEstimate(const Function &F) const; 163 164 private: 165 bool isLogging() const { return !!Logger; } 166 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 167 168 std::function<bool(CallBase &)> GetDefaultAdvice; 169 const bool IsDoingInference; 170 std::unique_ptr<TrainingLogger> Logger; 171 172 const std::optional<int32_t> InitialNativeSize; 173 std::optional<int32_t> CurrentNativeSize; 174 }; 175 176 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 177 /// decisions, for training/logging. 178 class LoggingMLInlineAdvice : public MLInlineAdvice { 179 public: 180 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 181 OptimizationRemarkEmitter &ORE, bool Recommendation, 182 TrainingLogger &Logger, 183 std::optional<size_t> CallerSizeEstimateBefore, 184 std::optional<size_t> CalleeSizeEstimateBefore, 185 bool DefaultDecision, bool Mandatory = false) 186 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 187 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 188 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 189 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 190 191 virtual ~LoggingMLInlineAdvice() = default; 192 193 private: 194 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 195 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 196 } 197 void recordInliningImpl() override { 198 MLInlineAdvice::recordInliningImpl(); 199 getAdvisor()->resetNativeSize(Caller); 200 int Reward = std::numeric_limits<int>::max(); 201 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 202 !getAdvisor()->isForcedToStop()) { 203 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 204 *CalleeSizeEstimateBefore; 205 Reward = NativeSizeAfter - 206 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 207 getAdvisor()->updateNativeSizeEstimate(Reward); 208 } 209 log(Reward, /*Success=*/true); 210 } 211 212 void recordInliningWithCalleeDeletedImpl() override { 213 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 214 getAdvisor()->resetNativeSize(Caller); 215 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 216 !getAdvisor()->isForcedToStop()) { 217 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 218 int Reward = NativeSizeAfter - 219 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 220 getAdvisor()->updateNativeSizeEstimate(Reward); 221 log(Reward, /*Success=*/true); 222 } else { 223 log(NoReward, /*Success=*/true); 224 } 225 } 226 227 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 228 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 229 log(NoReward, /*Success=*/false); 230 } 231 232 void recordUnattemptedInliningImpl() override { 233 MLInlineAdvice::recordUnattemptedInliningImpl(); 234 log(NoReward, /*Success=*/false); 235 } 236 237 void log(int64_t Reward, bool Success) { 238 if (Mandatory) 239 return; 240 InlineEvent Event; 241 Event.AdvisedDecision = isInliningRecommended(); 242 Event.DefaultDecision = DefaultDecision; 243 Event.Effect = Success; 244 Event.Reward = Reward; 245 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 246 } 247 248 static const int64_t NoReward = 0; 249 TrainingLogger &Logger; 250 const std::optional<size_t> CallerSizeEstimateBefore; 251 const std::optional<size_t> CalleeSizeEstimateBefore; 252 const int64_t DefaultDecision; 253 const int64_t Mandatory; 254 }; 255 256 static const std::vector<TensorSpec> TrainingOnlyFeatures{ 257 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 258 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 259 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 260 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 261 262 static const std::vector<TensorSpec> getInputFeatures() { 263 std::vector<TensorSpec> InputSpecs; 264 for (size_t I = 0; I < NumberOfFeatures; ++I) 265 InputSpecs.push_back(TensorSpec::createSpec<int64_t>( 266 TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape())); 267 append_range(InputSpecs, TrainingOnlyFeatures); 268 return InputSpecs; 269 } 270 271 } // namespace 272 273 TrainingLogger::TrainingLogger(StringRef LogFileName, 274 const ModelUnderTrainingRunner *MUTR) 275 : LogFileName(LogFileName), MUTR(MUTR) { 276 // The first output is the inlining decision. 277 std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end()); 278 279 if (MUTR) 280 append_range(FT, MUTR->extraOutputsForLoggingSpecs()); 281 282 DefaultDecisionPos = FT.size(); 283 FT.push_back(TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1})); 284 285 DecisionPos = FT.size(); 286 FT.push_back(TensorSpec::createSpec<int64_t>(DecisionName, {1})); 287 std::error_code EC; 288 auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); 289 if (EC) 290 dbgs() << (EC.message() + ":" + TrainingLog); 291 292 L = std::make_unique<Logger>( 293 std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 294 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 295 L->switchContext(""); 296 } 297 298 /// Log one inlining event. 299 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 300 const MLModelRunner &ModelRunner) { 301 L->startObservation(); 302 size_t CurrentFeature = 0; 303 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) 304 L->logTensorValue(CurrentFeature, 305 reinterpret_cast<const char *>( 306 ModelRunner.getTensorUntyped(CurrentFeature))); 307 308 if (MUTR) 309 for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) { 310 const char *RawData = 311 reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)); 312 L->logTensorValue(CurrentFeature, RawData); 313 ++CurrentFeature; 314 } 315 316 assert(CurrentFeature == DefaultDecisionPos); 317 L->logTensorValue(DefaultDecisionPos, 318 reinterpret_cast<const char *>(&Event.DefaultDecision)); 319 L->logTensorValue(DecisionPos, 320 reinterpret_cast<const char *>(&Event.AdvisedDecision)); 321 L->endObservation(); 322 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 323 L->logReward(Event.Reward); 324 325 // For debugging / later use 326 Effects.push_back(Event.Effect); 327 } 328 329 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 330 Module &M, ModuleAnalysisManager &MAM, 331 std::unique_ptr<MLModelRunner> ModelRunner, 332 std::function<bool(CallBase &)> GetDefaultAdvice, 333 std::unique_ptr<TrainingLogger> Logger) 334 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 335 GetDefaultAdvice(GetDefaultAdvice), 336 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), 337 Logger(std::move(Logger)), 338 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 339 CurrentNativeSize(InitialNativeSize) { 340 // We cannot have the case of neither inference nor logging. 341 assert(IsDoingInference || isLogging()); 342 } 343 344 std::optional<size_t> 345 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 346 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 347 return std::nullopt; 348 auto &R = 349 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 350 if (!R) { 351 F.getParent()->getContext().emitError( 352 "Native size estimator is not present."); 353 return 0; 354 } 355 return *R; 356 } 357 358 std::unique_ptr<MLInlineAdvice> 359 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 360 return std::make_unique<LoggingMLInlineAdvice>( 361 /*Advisor=*/this, 362 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 363 /*Logger=*/*Logger, 364 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 365 /*CalleeSizeEstimateBefore=*/ 366 getNativeSizeEstimate(*CB.getCalledFunction()), 367 /*DefaultDecision=*/true, /*Mandatory*/ true); 368 } 369 370 std::unique_ptr<MLInlineAdvice> 371 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 372 CallBase &CB, OptimizationRemarkEmitter &ORE) { 373 if (IsDoingInference && !isLogging()) 374 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 375 376 bool DefaultAdvice = GetDefaultAdvice(CB); 377 auto Recommendation = 378 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) 379 : DefaultAdvice; 380 return std::make_unique<LoggingMLInlineAdvice>( 381 /*Advisor=*/this, 382 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 383 /*Logger=*/*Logger, 384 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 385 /*CalleeSizeEstimateBefore=*/ 386 getNativeSizeEstimate(*CB.getCalledFunction()), 387 /*DefaultDecision=*/DefaultAdvice); 388 } 389 390 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 391 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 392 return 0; 393 size_t Ret = 0; 394 for (auto &F : M) { 395 if (F.isDeclaration()) 396 continue; 397 Ret += *getNativeSizeEstimate(F); 398 } 399 return Ret; 400 } 401 402 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 403 Module &M, ModuleAnalysisManager &MAM, 404 std::function<bool(CallBase &)> GetDefaultAdvice) { 405 auto &Ctx = M.getContext(); 406 std::unique_ptr<MLModelRunner> Runner; 407 if (TFModelUnderTrainingPath.empty()) 408 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); 409 else 410 Runner = ModelUnderTrainingRunner::createAndEnsureValid( 411 Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), 412 TFOutputSpecOverride); 413 if (!Runner) 414 return nullptr; 415 std::unique_ptr<TrainingLogger> Logger; 416 if (!TrainingLog.empty()) 417 Logger = std::make_unique<TrainingLogger>( 418 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); 419 420 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 421 M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); 422 } 423 #endif // defined(LLVM_HAVE_TFLITE) 424