1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a model runner using TFLite, allowing the 10 // loading of a model from a command line option. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Analysis/TensorSpec.h" 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TFLITE) 16 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Analysis/CallGraph.h" 19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 20 #include "llvm/Analysis/MLInlineAdvisor.h" 21 #include "llvm/Analysis/ModelUnderTrainingRunner.h" 22 #include "llvm/Analysis/NoInferenceModelRunner.h" 23 #include "llvm/Analysis/Utils/TFUtils.h" 24 #include "llvm/Analysis/Utils/TrainingLogger.h" 25 #include "llvm/IR/LLVMContext.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/ManagedStatic.h" 28 29 #include <vector> 30 #include <optional> 31 32 using namespace llvm; 33 34 static cl::opt<std::string> TrainingLog( 35 "training-log", cl::Hidden, 36 cl::desc("Path where the development - mode inlining log is saved.")); 37 38 static cl::opt<std::string> TFModelUnderTrainingPath( 39 "ml-inliner-model-under-training", cl::Hidden, 40 cl::desc(R"(Path to SavedModel from the previous training iteration. 41 The directory is also expected to contain a JSON specification of the 42 outputs expected to be logged, where the first entry must be the 43 inlining decision. The file containing the specification should be 44 called output_spec.json. The expected JSON value is an array of 45 dictionaries. Each dictionary should have 2 keys: 46 47 - "tensor_spec, followed by the TensorSpec description of the 48 output; and 49 - "logging_name", a string indicating the name to use when 50 logging the output values. 51 52 Example: 53 [ 54 { 55 "logging_name" : "some_name", 56 "tensor_spec" : { 57 "name" : "model_name", 58 "port" : 0, 59 "shape" : [2, 3], 60 "type" : "float" 61 } 62 } 63 ] 64 65 The first value must always correspond to the decision.)")); 66 67 static cl::opt<std::string> TFOutputSpecOverride( 68 "ml-inliner-output-spec-override", cl::Hidden, 69 cl::desc("Override the path to the output spec json file. See " 70 "-ml-inliner-model-under-training documentation for the " 71 "specification of that file.")); 72 73 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 74 cl::Hidden, cl::init("action_"), 75 cl::desc("Prefix for feature names.")); 76 77 namespace { 78 /// An InlineEvent, used by TrainingLogger. 79 struct InlineEvent { 80 /// What the default policy's decision would have been. 81 int64_t DefaultDecision = 0; 82 83 /// What we advised. When training off the default policy, this is the same as 84 /// DefaultDecision. 85 int64_t AdvisedDecision = 0; 86 87 /// What actually happened. This would be 'false' in the case of an inline 88 /// error, even if AdvisedDecision were true, otherwise it agrees with 89 /// AdvisedDecision. 90 bool Effect = false; 91 92 /// What the change in size was: size_after - size_before 93 int64_t Reward = 0; 94 }; 95 96 /// Collect data we may use for training a model. 97 class TrainingLogger final { 98 public: 99 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 100 101 /// Log one inlining event. 102 void logInlineEvent(const InlineEvent &Event, 103 const MLModelRunner &ModelRunner); 104 105 private: 106 StringRef LogFileName; 107 const ModelUnderTrainingRunner *const MUTR; 108 std::unique_ptr<Logger> L; 109 BitVector Effects; 110 /// Set these 2 clearly OOB, to make sure we set them later. 111 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 112 size_t DecisionPos = std::numeric_limits<size_t>::max(); 113 }; 114 115 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 116 /// the offline training scenario. Note that training happens outside of the 117 /// compiler, this facility is concerned with producing training data ("logs"). 118 /// This InlineAdvisor can operate in the following modes: 119 /// 120 /// 1) collect logs for the default policy. This is useful for bootstrapping 121 /// training, which will be considerably faster by starting from a reasonable 122 /// policy. 123 /// 124 /// 2) collect logs for the ML policy, using a model from a previous 125 /// training. Potentially, that model uses internally some small random 126 /// perturbation of its weights, to induce exploration (setting this up is the 127 /// responsibility of the training algorithm). The logs would then be used to 128 /// retrain and improve on this model. 129 /// 130 /// 3) use the provided model, with no logging. This is useful for end to end 131 /// validation - the model, in this case, is a release candidate and shouldn't 132 /// have random perturbations. It is a convenience feature: rather than needing 133 /// to take the release candidate model and compile it in 'release' mode, 134 /// validate it, then potentially discard it, it's easier to just pass the model 135 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 136 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 137 /// release mode. The expectation is that a well-trained model provides a good 138 /// policy over a sufficiently diverse codebase, over many changes (i.e. 139 /// training happens seldom). 140 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 141 public: 142 DevelopmentModeMLInlineAdvisor( 143 Module &M, ModuleAnalysisManager &MAM, 144 std::unique_ptr<MLModelRunner> ModelRunner, 145 std::function<bool(CallBase &)> GetDefaultAdvice, 146 std::unique_ptr<TrainingLogger> Logger); 147 148 size_t getTotalSizeEstimate(); 149 150 void updateNativeSizeEstimate(int64_t Change) { 151 *CurrentNativeSize += Change; 152 } 153 void resetNativeSize(Function *F) { 154 PreservedAnalyses PA = PreservedAnalyses::all(); 155 PA.abandon<InlineSizeEstimatorAnalysis>(); 156 FAM.invalidate(*F, PA); 157 } 158 159 std::unique_ptr<MLInlineAdvice> 160 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 161 162 std::optional<size_t> getNativeSizeEstimate(const Function &F) const; 163 164 private: 165 bool isLogging() const { return !!Logger; } 166 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 167 168 const bool IsDoingInference; 169 std::unique_ptr<TrainingLogger> Logger; 170 171 const std::optional<int32_t> InitialNativeSize; 172 std::optional<int32_t> CurrentNativeSize; 173 }; 174 175 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 176 /// decisions, for training/logging. 177 class LoggingMLInlineAdvice : public MLInlineAdvice { 178 public: 179 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 180 OptimizationRemarkEmitter &ORE, bool Recommendation, 181 TrainingLogger &Logger, 182 std::optional<size_t> CallerSizeEstimateBefore, 183 std::optional<size_t> CalleeSizeEstimateBefore, 184 bool DefaultDecision, bool Mandatory = false) 185 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 186 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 187 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 188 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 189 190 virtual ~LoggingMLInlineAdvice() = default; 191 192 private: 193 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 194 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 195 } 196 void recordInliningImpl() override { 197 MLInlineAdvice::recordInliningImpl(); 198 getAdvisor()->resetNativeSize(Caller); 199 int Reward = std::numeric_limits<int>::max(); 200 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 201 !getAdvisor()->isForcedToStop()) { 202 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 203 *CalleeSizeEstimateBefore; 204 Reward = NativeSizeAfter - 205 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 206 getAdvisor()->updateNativeSizeEstimate(Reward); 207 } 208 log(Reward, /*Success=*/true); 209 } 210 211 void recordInliningWithCalleeDeletedImpl() override { 212 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 213 getAdvisor()->resetNativeSize(Caller); 214 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 215 !getAdvisor()->isForcedToStop()) { 216 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 217 int Reward = NativeSizeAfter - 218 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 219 getAdvisor()->updateNativeSizeEstimate(Reward); 220 log(Reward, /*Success=*/true); 221 } else { 222 log(NoReward, /*Success=*/true); 223 } 224 } 225 226 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 227 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 228 log(NoReward, /*Success=*/false); 229 } 230 231 void recordUnattemptedInliningImpl() override { 232 MLInlineAdvice::recordUnattemptedInliningImpl(); 233 log(NoReward, /*Success=*/false); 234 } 235 236 void log(int64_t Reward, bool Success) { 237 if (Mandatory) 238 return; 239 InlineEvent Event; 240 Event.AdvisedDecision = isInliningRecommended(); 241 Event.DefaultDecision = DefaultDecision; 242 Event.Effect = Success; 243 Event.Reward = Reward; 244 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 245 } 246 247 static const int64_t NoReward = 0; 248 TrainingLogger &Logger; 249 const std::optional<size_t> CallerSizeEstimateBefore; 250 const std::optional<size_t> CalleeSizeEstimateBefore; 251 const int64_t DefaultDecision; 252 const int64_t Mandatory; 253 }; 254 255 static const std::vector<TensorSpec> TrainingOnlyFeatures{ 256 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 257 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 258 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 259 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 260 261 static const std::vector<TensorSpec> getInputFeatures() { 262 std::vector<TensorSpec> InputSpecs; 263 for (size_t I = 0; I < NumberOfFeatures; ++I) 264 InputSpecs.push_back(TensorSpec::createSpec<int64_t>( 265 TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape())); 266 append_range(InputSpecs, TrainingOnlyFeatures); 267 return InputSpecs; 268 } 269 270 } // namespace 271 272 TrainingLogger::TrainingLogger(StringRef LogFileName, 273 const ModelUnderTrainingRunner *MUTR) 274 : LogFileName(LogFileName), MUTR(MUTR) { 275 // The first output is the inlining decision. 276 std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end()); 277 278 if (MUTR) 279 append_range(FT, MUTR->extraOutputsForLoggingSpecs()); 280 281 DefaultDecisionPos = FT.size(); 282 FT.push_back(DefaultDecisionSpec); 283 284 DecisionPos = FT.size(); 285 FT.push_back(InlineDecisionSpec); 286 std::error_code EC; 287 auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); 288 if (EC) 289 dbgs() << (EC.message() + ":" + TrainingLog); 290 291 L = std::make_unique<Logger>( 292 std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 293 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 294 L->switchContext(""); 295 } 296 297 /// Log one inlining event. 298 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 299 const MLModelRunner &ModelRunner) { 300 L->startObservation(); 301 size_t CurrentFeature = 0; 302 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) 303 L->logTensorValue(CurrentFeature, 304 reinterpret_cast<const char *>( 305 ModelRunner.getTensorUntyped(CurrentFeature))); 306 307 if (MUTR) 308 for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) { 309 const char *RawData = 310 reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)); 311 L->logTensorValue(CurrentFeature, RawData); 312 ++CurrentFeature; 313 } 314 315 assert(CurrentFeature == DefaultDecisionPos); 316 L->logTensorValue(DefaultDecisionPos, 317 reinterpret_cast<const char *>(&Event.DefaultDecision)); 318 L->logTensorValue(DecisionPos, 319 reinterpret_cast<const char *>(&Event.AdvisedDecision)); 320 L->endObservation(); 321 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 322 L->logReward(Event.Reward); 323 324 // For debugging / later use 325 Effects.push_back(Event.Effect); 326 } 327 328 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 329 Module &M, ModuleAnalysisManager &MAM, 330 std::unique_ptr<MLModelRunner> ModelRunner, 331 std::function<bool(CallBase &)> GetDefaultAdvice, 332 std::unique_ptr<TrainingLogger> Logger) 333 : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice), 334 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), 335 Logger(std::move(Logger)), 336 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 337 CurrentNativeSize(InitialNativeSize) { 338 // We cannot have the case of neither inference nor logging. 339 assert(IsDoingInference || isLogging()); 340 } 341 342 std::optional<size_t> 343 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 344 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 345 return std::nullopt; 346 auto &R = 347 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 348 if (!R) { 349 F.getParent()->getContext().emitError( 350 "Native size estimator is not present."); 351 return 0; 352 } 353 return *R; 354 } 355 356 std::unique_ptr<MLInlineAdvice> 357 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 358 return std::make_unique<LoggingMLInlineAdvice>( 359 /*Advisor=*/this, 360 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 361 /*Logger=*/*Logger, 362 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 363 /*CalleeSizeEstimateBefore=*/ 364 getNativeSizeEstimate(*CB.getCalledFunction()), 365 /*DefaultDecision=*/true, /*Mandatory*/ true); 366 } 367 368 std::unique_ptr<MLInlineAdvice> 369 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 370 CallBase &CB, OptimizationRemarkEmitter &ORE) { 371 if (IsDoingInference && !isLogging()) 372 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 373 374 bool DefaultAdvice = GetDefaultAdvice(CB); 375 auto Recommendation = 376 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) 377 : DefaultAdvice; 378 return std::make_unique<LoggingMLInlineAdvice>( 379 /*Advisor=*/this, 380 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 381 /*Logger=*/*Logger, 382 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 383 /*CalleeSizeEstimateBefore=*/ 384 getNativeSizeEstimate(*CB.getCalledFunction()), 385 /*DefaultDecision=*/DefaultAdvice); 386 } 387 388 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 389 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 390 return 0; 391 size_t Ret = 0; 392 for (auto &F : M) { 393 if (F.isDeclaration()) 394 continue; 395 Ret += *getNativeSizeEstimate(F); 396 } 397 return Ret; 398 } 399 400 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 401 Module &M, ModuleAnalysisManager &MAM, 402 std::function<bool(CallBase &)> GetDefaultAdvice) { 403 auto &Ctx = M.getContext(); 404 std::unique_ptr<MLModelRunner> Runner; 405 if (TFModelUnderTrainingPath.empty()) 406 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); 407 else 408 Runner = ModelUnderTrainingRunner::createAndEnsureValid( 409 Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), 410 TFOutputSpecOverride); 411 if (!Runner) 412 return nullptr; 413 std::unique_ptr<TrainingLogger> Logger; 414 if (!TrainingLog.empty()) 415 Logger = std::make_unique<TrainingLogger>( 416 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); 417 418 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 419 M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); 420 } 421 #endif // defined(LLVM_HAVE_TFLITE) 422