1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a model runner using TFLite, allowing the 10 // loading of a model from a command line option. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Analysis/TensorSpec.h" 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TFLITE) 16 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Analysis/CallGraph.h" 19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 20 #include "llvm/Analysis/MLInlineAdvisor.h" 21 #include "llvm/Analysis/ModelUnderTrainingRunner.h" 22 #include "llvm/Analysis/NoInferenceModelRunner.h" 23 #include "llvm/Analysis/Utils/TFUtils.h" 24 #include "llvm/Analysis/Utils/TrainingLogger.h" 25 #include "llvm/IR/LLVMContext.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/ManagedStatic.h" 29 30 #include <vector> 31 #include <optional> 32 33 using namespace llvm; 34 35 static cl::opt<std::string> TrainingLog( 36 "training-log", cl::Hidden, 37 cl::desc("Path where the development - mode inlining log is saved.")); 38 39 static cl::opt<std::string> TFModelUnderTrainingPath( 40 "ml-inliner-model-under-training", cl::Hidden, 41 cl::desc(R"(Path to SavedModel from the previous training iteration. 42 The directory is also expected to contain a JSON specification of the 43 outputs expected to be logged, where the first entry must be the 44 inlining decision. The file containing the specification should be 45 called output_spec.json. The expected JSON value is an array of 46 dictionaries. Each dictionary should have 2 keys: 47 48 - "tensor_spec, followed by the TensorSpec description of the 49 output; and 50 - "logging_name", a string indicating the name to use when 51 logging the output values. 52 53 Example: 54 [ 55 { 56 "logging_name" : "some_name", 57 "tensor_spec" : { 58 "name" : "model_name", 59 "port" : 0, 60 "shape" : [2, 3], 61 "type" : "float" 62 } 63 } 64 ] 65 66 The first value must always correspond to the decision.)")); 67 68 static cl::opt<std::string> TFOutputSpecOverride( 69 "ml-inliner-output-spec-override", cl::Hidden, 70 cl::desc("Override the path to the output spec json file. See " 71 "-ml-inliner-model-under-training documentation for the " 72 "specification of that file.")); 73 74 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 75 cl::Hidden, cl::init("action_"), 76 cl::desc("Prefix for feature names.")); 77 78 namespace { 79 /// An InlineEvent, used by TrainingLogger. 80 struct InlineEvent { 81 /// What the default policy's decision would have been. 82 int64_t DefaultDecision = 0; 83 84 /// What we advised. When training off the default policy, this is the same as 85 /// DefaultDecision. 86 int64_t AdvisedDecision = 0; 87 88 /// What actually happened. This would be 'false' in the case of an inline 89 /// error, even if AdvisedDecision were true, otherwise it agrees with 90 /// AdvisedDecision. 91 bool Effect = false; 92 93 /// What the change in size was: size_after - size_before 94 int64_t Reward = 0; 95 }; 96 97 /// Collect data we may use for training a model. 98 class TrainingLogger final { 99 public: 100 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 101 102 /// Log one inlining event. 103 void logInlineEvent(const InlineEvent &Event, 104 const MLModelRunner &ModelRunner); 105 106 private: 107 StringRef LogFileName; 108 const ModelUnderTrainingRunner *const MUTR; 109 std::unique_ptr<Logger> L; 110 BitVector Effects; 111 /// Set these 2 clearly OOB, to make sure we set them later. 112 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 113 size_t DecisionPos = std::numeric_limits<size_t>::max(); 114 }; 115 116 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 117 /// the offline training scenario. Note that training happens outside of the 118 /// compiler, this facility is concerned with producing training data ("logs"). 119 /// This InlineAdvisor can operate in the following modes: 120 /// 121 /// 1) collect logs for the default policy. This is useful for bootstrapping 122 /// training, which will be considerably faster by starting from a reasonable 123 /// policy. 124 /// 125 /// 2) collect logs for the ML policy, using a model from a previous 126 /// training. Potentially, that model uses internally some small random 127 /// perturbation of its weights, to induce exploration (setting this up is the 128 /// responsibility of the training algorithm). The logs would then be used to 129 /// retrain and improve on this model. 130 /// 131 /// 3) use the provided model, with no logging. This is useful for end to end 132 /// validation - the model, in this case, is a release candidate and shouldn't 133 /// have random perturbations. It is a convenience feature: rather than needing 134 /// to take the release candidate model and compile it in 'release' mode, 135 /// validate it, then potentially discard it, it's easier to just pass the model 136 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 137 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 138 /// release mode. The expectation is that a well-trained model provides a good 139 /// policy over a sufficiently diverse codebase, over many changes (i.e. 140 /// training happens seldom). 141 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 142 public: 143 DevelopmentModeMLInlineAdvisor( 144 Module &M, ModuleAnalysisManager &MAM, 145 std::unique_ptr<MLModelRunner> ModelRunner, 146 std::function<bool(CallBase &)> GetDefaultAdvice, 147 std::unique_ptr<TrainingLogger> Logger); 148 149 size_t getTotalSizeEstimate(); 150 151 void updateNativeSizeEstimate(int64_t Change) { 152 *CurrentNativeSize += Change; 153 } 154 void resetNativeSize(Function *F) { 155 PreservedAnalyses PA = PreservedAnalyses::all(); 156 PA.abandon<InlineSizeEstimatorAnalysis>(); 157 FAM.invalidate(*F, PA); 158 } 159 160 std::unique_ptr<MLInlineAdvice> 161 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 162 163 std::optional<size_t> getNativeSizeEstimate(const Function &F) const; 164 165 private: 166 bool isLogging() const { return !!Logger; } 167 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 168 169 const bool IsDoingInference; 170 std::unique_ptr<TrainingLogger> Logger; 171 172 const std::optional<int32_t> InitialNativeSize; 173 std::optional<int32_t> CurrentNativeSize; 174 }; 175 176 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 177 /// decisions, for training/logging. 178 class LoggingMLInlineAdvice : public MLInlineAdvice { 179 public: 180 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 181 OptimizationRemarkEmitter &ORE, bool Recommendation, 182 TrainingLogger &Logger, 183 std::optional<size_t> CallerSizeEstimateBefore, 184 std::optional<size_t> CalleeSizeEstimateBefore, 185 bool DefaultDecision, bool Mandatory = false) 186 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 187 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 188 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 189 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 190 191 virtual ~LoggingMLInlineAdvice() = default; 192 193 private: 194 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 195 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 196 } 197 void recordInliningImpl() override { 198 MLInlineAdvice::recordInliningImpl(); 199 getAdvisor()->resetNativeSize(Caller); 200 int Reward = std::numeric_limits<int>::max(); 201 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 202 !getAdvisor()->isForcedToStop()) { 203 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 204 *CalleeSizeEstimateBefore; 205 Reward = NativeSizeAfter - 206 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 207 getAdvisor()->updateNativeSizeEstimate(Reward); 208 } 209 log(Reward, /*Success=*/true); 210 } 211 212 void recordInliningWithCalleeDeletedImpl() override { 213 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 214 getAdvisor()->resetNativeSize(Caller); 215 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 216 !getAdvisor()->isForcedToStop()) { 217 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 218 int Reward = NativeSizeAfter - 219 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 220 getAdvisor()->updateNativeSizeEstimate(Reward); 221 log(Reward, /*Success=*/true); 222 } else { 223 log(NoReward, /*Success=*/true); 224 } 225 } 226 227 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 228 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 229 log(NoReward, /*Success=*/false); 230 } 231 232 void recordUnattemptedInliningImpl() override { 233 MLInlineAdvice::recordUnattemptedInliningImpl(); 234 log(NoReward, /*Success=*/false); 235 } 236 237 void log(int64_t Reward, bool Success) { 238 if (Mandatory) 239 return; 240 InlineEvent Event; 241 Event.AdvisedDecision = isInliningRecommended(); 242 Event.DefaultDecision = DefaultDecision; 243 Event.Effect = Success; 244 Event.Reward = Reward; 245 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 246 } 247 248 static const int64_t NoReward = 0; 249 TrainingLogger &Logger; 250 const std::optional<size_t> CallerSizeEstimateBefore; 251 const std::optional<size_t> CalleeSizeEstimateBefore; 252 const int64_t DefaultDecision; 253 const int64_t Mandatory; 254 }; 255 256 static const std::vector<TensorSpec> TrainingOnlyFeatures{ 257 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 258 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 259 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 260 261 static const std::vector<TensorSpec> getInputFeatures() { 262 std::vector<TensorSpec> InputSpecs; 263 for (size_t I = 0; I < NumberOfFeatures; ++I) 264 InputSpecs.push_back(TensorSpec::createSpec<int64_t>( 265 TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape())); 266 append_range(InputSpecs, TrainingOnlyFeatures); 267 return InputSpecs; 268 } 269 270 } // namespace 271 272 TrainingLogger::TrainingLogger(StringRef LogFileName, 273 const ModelUnderTrainingRunner *MUTR) 274 : LogFileName(LogFileName), MUTR(MUTR) { 275 // The first output is the inlining decision. 276 std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end()); 277 278 if (MUTR) 279 append_range(FT, MUTR->extraOutputsForLoggingSpecs()); 280 281 DefaultDecisionPos = FT.size(); 282 FT.push_back(DefaultDecisionSpec); 283 284 DecisionPos = FT.size(); 285 FT.push_back(InlineDecisionSpec); 286 std::error_code EC; 287 auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); 288 if (EC) 289 dbgs() << (EC.message() + ":" + TrainingLog); 290 291 L = std::make_unique<Logger>( 292 std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 293 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 294 L->switchContext(""); 295 } 296 297 /// Log one inlining event. 298 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 299 const MLModelRunner &ModelRunner) { 300 L->startObservation(); 301 size_t CurrentFeature = 0; 302 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) 303 L->logTensorValue(CurrentFeature, 304 reinterpret_cast<const char *>( 305 ModelRunner.getTensorUntyped(CurrentFeature))); 306 307 if (MUTR) 308 for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) { 309 const char *RawData = 310 reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)); 311 L->logTensorValue(CurrentFeature, RawData); 312 ++CurrentFeature; 313 } 314 315 assert(CurrentFeature == DefaultDecisionPos); 316 L->logTensorValue(DefaultDecisionPos, 317 reinterpret_cast<const char *>(&Event.DefaultDecision)); 318 L->logTensorValue(DecisionPos, 319 reinterpret_cast<const char *>(&Event.AdvisedDecision)); 320 L->endObservation(); 321 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 322 L->logReward(Event.Reward); 323 324 // For debugging / later use 325 Effects.push_back(Event.Effect); 326 } 327 328 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 329 Module &M, ModuleAnalysisManager &MAM, 330 std::unique_ptr<MLModelRunner> ModelRunner, 331 std::function<bool(CallBase &)> GetDefaultAdvice, 332 std::unique_ptr<TrainingLogger> Logger) 333 : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice), 334 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), 335 Logger(std::move(Logger)), 336 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 337 CurrentNativeSize(InitialNativeSize) { 338 // We cannot have the case of neither inference nor logging. 339 assert(IsDoingInference || isLogging()); 340 } 341 342 std::optional<size_t> 343 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 344 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 345 return std::nullopt; 346 auto &R = 347 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 348 if (!R) { 349 F.getParent()->getContext().emitError( 350 "Native size estimator is not present."); 351 return 0; 352 } 353 return *R; 354 } 355 356 std::unique_ptr<MLInlineAdvice> 357 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 358 return std::make_unique<LoggingMLInlineAdvice>( 359 /*Advisor=*/this, 360 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 361 /*Logger=*/*Logger, 362 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 363 /*CalleeSizeEstimateBefore=*/ 364 getNativeSizeEstimate(*CB.getCalledFunction()), 365 /*DefaultDecision=*/true, /*Mandatory*/ true); 366 } 367 368 std::unique_ptr<MLInlineAdvice> 369 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 370 CallBase &CB, OptimizationRemarkEmitter &ORE) { 371 if (IsDoingInference && !isLogging()) 372 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 373 374 bool DefaultAdvice = GetDefaultAdvice(CB); 375 auto Recommendation = 376 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) 377 : DefaultAdvice; 378 return std::make_unique<LoggingMLInlineAdvice>( 379 /*Advisor=*/this, 380 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 381 /*Logger=*/*Logger, 382 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 383 /*CalleeSizeEstimateBefore=*/ 384 getNativeSizeEstimate(*CB.getCalledFunction()), 385 /*DefaultDecision=*/DefaultAdvice); 386 } 387 388 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 389 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 390 return 0; 391 size_t Ret = 0; 392 for (auto &F : M) { 393 if (F.isDeclaration()) 394 continue; 395 Ret += *getNativeSizeEstimate(F); 396 } 397 return Ret; 398 } 399 400 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 401 Module &M, ModuleAnalysisManager &MAM, 402 std::function<bool(CallBase &)> GetDefaultAdvice) { 403 auto &Ctx = M.getContext(); 404 std::unique_ptr<MLModelRunner> Runner; 405 if (TFModelUnderTrainingPath.empty()) 406 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); 407 else 408 Runner = ModelUnderTrainingRunner::createAndEnsureValid( 409 Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), 410 TFOutputSpecOverride); 411 if (!Runner) 412 return nullptr; 413 std::unique_ptr<TrainingLogger> Logger; 414 if (!TrainingLog.empty()) 415 Logger = std::make_unique<TrainingLogger>( 416 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); 417 418 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 419 M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); 420 } 421 #endif // defined(LLVM_HAVE_TFLITE) 422